This is an R Markdown
Notebook. When you execute code within the notebook, the results appear
beneath the code.
Try executing this chunk by clicking the Run button within
the chunk or by placing your cursor inside it and pressing
Cmd+Shift+Enter.
plot(cars)

Add a new chunk by clicking the Insert Chunk button on the
toolbar or by pressing Cmd+Option+I.
When you save the notebook, an HTML file containing the code and
output will be saved alongside it (click the Preview button or
press Cmd+Shift+K to preview the HTML file).
The preview shows you a rendered HTML copy of the contents of the
editor. Consequently, unlike Knit, Preview does not
run any R code chunks. Instead, the output of the chunk when it was last
run in the editor is displayed.
setwd("/Users/jayshreenohar/Downloads")
##Merge datasets
df_Johnson_2017
#Nasdaq
#2017-2018
df_NASDAQ <- merge(Tweets_Nasdaq, NASDAQ_new, by = "date", all.x = TRUE)
write.csv(df_NASDAQ, file = "Nasdaq_2017", row.names = FALSE)
df_NASDAQ
#Autodesk
#2018
df_autodesk <- merge(Autodesk_Tweets, Autodesk_SP_2018, by = "date", all.x = TRUE)
write.csv(df_autodesk, file = "autodesk_2018", row.names = FALSE)
df_autodesk
#Hasbro
#2018
df_Hasbro_2018 <- merge(Hasbro_Tweets_2018, Hasbro_SP_2018, by = "date", all.x = TRUE)
write.csv(df_Hasbro_2018, file = "Hasbro_2018", row.names = FALSE)
#2016
df_Hasbro_2016 <- merge(Hasbro_Tweets_2016, Hasbro_SP_2016, by = "date", all.x = TRUE)
write.csv(df_Hasbro_2016, file = "Hasbro_2018", row.names = FALSE)
df_Hasbro_2016
df_Hasbro_2018
#Intel
#2018
df_Intel_2018 <- merge(Intel_Tweets_2018, Intel_2018_SP, by = "date", all.x = TRUE)
write.csv(df_Intel_2018, file = "Intel_2018", row.names = FALSE)
#2016
df_Intel_2016 <- merge(Intel_Tweets_2016, Intel_2016, by = "date", all.x = TRUE)
write.csv(df_Intel_2016, file = "Intel_2016", row.names = FALSE)
df_Intel_2016
df_Intel_2018
#Activision
#2016
df_Activision_2016 <- merge(Activision_Tweets_2016, Activision_SP_2016, by = "date", all.x = TRUE)
write.csv(df_Activision_2016, file = "Activision_2016", row.names = FALSE)
#2018
df_Activision_2018 <- merge(Activision_Tweets_2018, Activision_SP_2018, by = "date", all.x = TRUE)
write.csv(df_Activision_2018, file = "Activision_2018", row.names = FALSE)
df_Activision_2016
df_Activision_2018
#Inuit
#2016
df_Inuit_2016 <- merge(Tweets_Inuit_2016, Inuit_SP_2016, by = "date", all.x = TRUE)
write.csv(df_Inuit_2016, file = "Intuit_2016", row.names = FALSE)
#2018
df_Inuit_2018 <- merge(Tweets_Inuit_2018, Intuit_SP_2018, by = "date", all.x = TRUE)
write.csv(df_Inuit_2018, file = "Intuit_2018", row.names = FALSE)
df_Inuit_2016
df_Inuit_2018
#Allergan
#2016
df_Allergan_2016 <- merge(Allergan_Tweets_2016, Allergan_SP_2016, by = "date", all.x = TRUE)
write.csv(df_Allergan_2016, file = "Allergan_2016", row.names = FALSE)
#2018
df_Allergan_2018 <- merge(Allergan_Tweets_2018, Allergan_SP_2018, by = "date", all.x = TRUE)
write.csv(df_Allergan_2018, file = "Allergan_2018", row.names = FALSE)
df_Allergan_2016
df_Allergan_2018
#Humana
#2016
df_Humana_2016 <- merge(Humana_Tweets_2016, Humana_SP_2016, by = "date", all.x = TRUE)
write.csv(df_Humana_2016, file = "Humana_2016", row.names = FALSE)
#2018
df_Humana_2018 <- merge(Humana_2018_Tweets, Humana_SP_2018, by = "date", all.x = TRUE)
write.csv(df_Humana_2018, file = "Humana_2018", row.names = FALSE)
df_Humana_2016
df_Humana_2018
#Autodesk_Carl_Bass
#2016
df_CB_autodesk_2016 <- merge(Autodesk_CB_tweets_2016, Autodesk_CB_SP_2016, by = "date", all.x = TRUE)
write.csv(df_CB_autodesk_2016, file = "Autodesk_CB_2016", row.names = FALSE)
#2018
df_CB_autodesk_2018 <- merge(Autodesk_CB_tweets_2017, Autodesk_2017_SP_CB, by = "date", all.x = TRUE)
write.csv(df_CB_autodesk_2018, file = "Autodesk_CB_2018", row.names = FALSE)
df_CB_autodesk_2016
df_CB_autodesk_2018
#Equinox
#2018
df_equinox_2018 <- merge(Equinox_2018_Tweets, Equinox_2018_SP, by = "date", all.x = TRUE)
write.csv(df_equinox_2018, file = "Equinox_2018", row.names = FALSE)
df_equinox_2018
#Cisco
#2016
df_cisco_2016 <- merge(Cisco_Tweets_2016, Cisco_SP_2016, by = "date", all.x = TRUE)
write.csv(df_cisco_2016, file = "Cisco_2016", row.names = FALSE)
#2018
df_cisco_2018 <- merge(Cisco_Tweets_2018, Cisco_SP_2018, by = "date", all.x = TRUE)
write.csv(df_cisco_2018, file = "Cisco_2018", row.names = FALSE)
df_cisco_2016
df_cisco_2018
#EBAY
#2016
df_EBAY_2016 <-merge(EBAY_Tweets_2016, EBAY_2016_SP, by = "date", all.x = TRUE)
write.csv(df_EBAY_2016, file = "EBAY_2016", row.names = FALSE)
#2018
df_EBAY_2018 <-merge(EBAY_Tweets_2018, EBAY_SP_2018, by = "date", all.x = TRUE)
write.csv(df_EBAY_2018, file = "EBAY_2018", row.names = FALSE)
df_EBAY_2016
df_EBAY_2018
#Davita
#2016
df_Davita_2016 <-merge(Tweets_Davita_2016, Davita_SP_2016, by = "date", all.x = TRUE)
write.csv(df_Davita_2016, file = "Davita_2016", row.names = FALSE)
#2017
df_Davita_2017 <-merge(Davita_Inc_Tweets_2017, Davita_SP_2017, by = "date", all.x = TRUE)
write.csv(df_Davita_2017, file = "Davita_2017", row.names = FALSE)
df_Davita_2016
df_Davita_2017
#Illumina
#2016
df_Illumina_2016 <-merge(Illumina_Tweets_2016, Illumina_SP_2016, by = "date", all.x = TRUE)
write.csv(df_Illumina_2016, file = "Illumina_2016", row.names = FALSE)
#2018
df_Illumina_2018 <-merge(Illumina_Tweets_2017_2018, Illumina_2017_2018_SP, by = "date", all.x = TRUE)
write.csv(df_Illumina_2018, file = "Illumina_2018", row.names = FALSE)
df_Illumina_2016
df_Illumina_2018
#HomeDepot
#2013-2014
df_Homedepot_2013 <-merge(Home_depot_tweets_2013,Home_Depot_SP, by = "date", all.x = TRUE)
write.csv(df_Homedepot_2013, file = "Homedepot_2013", row.names = TRUE)
df_Homedepot_2013
#Southwest_Airlines
#2016
df_Southwest_2016 <-merge(Southwest_Tweets_2016,Southwest_SP_2016, by = "date", all.x = TRUE)
write.csv(df_Southwest_2016, file = "Southwest_2016", row.names = FALSE)
#2018
df_Southwest_2018 <-merge(Southwest_Tweets_2018,Southwest_SP_2018, by = "date", all.x = TRUE)
write.csv(df_Southwest_2018, file = "Southwest_2018", row.names = FALSE)
df_Southwest_2016
df_Southwest_2018
#FIS
#2016
df_FIS_2016 <-merge(FIS_Tweets_2016, FIS_2016_SP, by = "date", all.x = TRUE)
write.csv(df_FIS_2016, file = "FIS_2016", row.names = FALSE)
#2018
df_FIS_2018 <-merge(FIS_Tweets_2018, FIS_2018_SP, by = "date", all.x = TRUE)
write.csv(df_FIS_2018, file = "FIS_2018", row.names = FALSE)
df_FIS_2016
df_FIS_2018
#Leucadia_Nation
#2016
df_Leucadia_2016 <-merge(Leucadia_Tweets_2016, Leucadia_SP_2016, by = "date", all.x = TRUE)
write.csv(df_Leucadia_2016, file = "Leucadia_2016", row.names = FALSE)
#2018
df_Leucadia_2018 <-merge(Leucadia_2018_Tweets, Leucadia_SP_2018, by = "date", all.x = TRUE)
write.csv(df_Leucadia_2018, file = "Leucadia_2018", row.names = FALSE)
df_Leucadia_2016
df_Leucadia_2018
#Verizon
#2018
df_Verizon_2018 <- merge(Verizon_Tweets_2018, Verizon_SP_2018, by = "date", all.x = TRUE)
write.csv(df_Verizon_2018, file = "Verizon_2018", row.names = FALSE)
df_Verizon_2018
#Western Union
#2018
df_WU_2018 <- merge(Western_Union_Tweets_2018, WU_SP_2018, by = "date", all.x = TRUE)
write.csv(df_WU_2018, file = "WU_2018", row.names = FALSE)
#2016
df_WU_2016 <- merge(Western_Union_Tweets_2016, WU_SP_2016, by = "date", all.x = TRUE)
write.csv(df_WU_2016, file = "WU_2016", row.names = FALSE)
df_WU_2016
df_WU_2018
#Red_Hat
#2016
df_RedHat_2016 <- merge(Red_Hat_Tweets_2016, Red_Hat_SP_2016, by = "date", all.x = TRUE)
write.csv(df_RedHat_2016, file = "RedHat_2016", row.names = FALSE)
#2018
df_RedHat_2018 <- merge(Red_Hat_Tweets_2018, Red_Hat_SP_2018, by = "date", all.x = TRUE)
write.csv(df_RedHat_2018, file = "RedHat_2018", row.names = FALSE)
df_RedHat_2016
df_RedHat_2018
#AMZN
#2016
df_AMZN_2016 <- merge(AMZN_Tweets_2016, AMZN_SP_2016, by = "date", all.x = TRUE)
write.csv(df_AMZN_2016, file = "AMZN_2016", row.names = FALSE)
#2018
df_AMZN_2018 <- merge(AMZN_Tweets_2018, AMZN_SP_2018, by = "date", all.x = TRUE)
write.csv(df_AMZN_2018, file = "AMZN_2018", row.names = FALSE)
df_AMZN_2016
df_AMZN_2018
#GE
#2016
df_GE_2016 <- merge(GE_Tweets_2016, GE_2016_SP, by = "date", all.x = TRUE)
write.csv(df_GE_2016, file = "GE_2016", row.names = FALSE)
#2017
df_GE_2017 <- merge(GE_Tweets_2017, GE_2017_SP, by = "date", all.x = TRUE)
write.csv(df_GE_2017, file = "GE_2017", row.names = FALSE)
df_GE_2016
df_GE_2017
#Fiserv
#2016
df_Fiserv_2016 <- merge(Fiserv_Tweets_2016, Fiserv_SP_2016, by = "date", all.x = TRUE)
write.csv(df_Fiserv_2016, file = "Fiserv_2016", row.names = FALSE)
#2018
df_Fiserv_2018 <- merge(Fiserv_Tweets_2018, Fiserv_SP_2018, by = "date", all.x = TRUE)
write.csv(df_Fiserv_2018, file = "Fiserv_2018", row.names = FALSE)
df_Fiserv_2016
df_Fiserv_2018
#Waste_Management
#2018
df_WM_2018 <- merge(WM_Tweets_2018, WM_SP_2018, by = "date", all.x = TRUE)
write.csv(df_WM_2018, file = "WM_2018", row.names = FALSE)
df_WM_2018
#Wills_Tower
#2016
df_Wills_2016 <- merge(Wills_Tower_Tweets_2016, Wills_Tower_SP_2016, by = "date", all.x = TRUE)
write.csv(df_Wills_2016, file = "Wills_2016", row.names = FALSE)
#2018
df_Wills_2018 <- merge(Wills_Tower_Tweets_2018, WLTW_SP_2018, by = "date", all.x = TRUE)
write.csv(df_Wills_2018, file = "Wills_2018", row.names = FALSE)
df_Wills_2016
df_Wills_2018
#Tripadvisor
#2016
df_tripadvisor_2016 <- merge(Tripadvisor_Tweets_2016, Tripadvisor_SP_2016, by = "date", all.x = TRUE)
write.csv(df_tripadvisor_2016, file = "Tripadvisor_2016", row.names = FALSE)
#2018
df_tripadvisor_2018 <- merge(Tripadvisor_Tweets_2018, Tripadvisor_SP_2018, by = "date", all.x = TRUE)
write.csv(df_tripadvisor_2018, file = "Tripadvisor_2018", row.names = FALSE)
df_tripadvisor_2016
df_tripadvisor_2018
#Davita_Kent
#2016
df_DavitaK_2016 <- merge(Davita_Tweets_2016K, Davita_SP_2016, by = "date", all.x = TRUE)
write.csv(df_DavitaK_2016, file = "Davita_K_2016", row.names = FALSE)
#2018
df_DavitaK_2018 <- merge(Davita_Tweets_2018K, Davita_SP_2018, by = "date", all.x = TRUE)
write.csv(df_DavitaK_2018, file = "Davita_K_2018", row.names = FALSE)
df_DavitaK_2016
df_DavitaK_2018
#Starbucks
df_Starbucks_2018 <- merge(Starbucks_Tweets_2018, Starbucks_SP_2018, by = "date", all.x = TRUE)
write.csv(df_Starbucks_2018, file = "Starbucks_2018", row.names = FALSE)
df_Starbucks_2018
#McCormick
#2018
df_McCormick_2018 <- merge(McCormick_Tweets_2018, McCormick_SP_2018, by = "date", all.x = TRUE)
write.csv(df_McCormick_2018, file = "McCormick_2018", row.names = FALSE)
df_McCormick_2018
#IHS Markit
#2018
df_IHS_2018 <- merge(IHS_Tweets_2018, IHS_SP_2018, by = "date", all.x = TRUE)
write.csv(df_IHS_2018, file = "IHS_2018", row.names = FALSE)
df_IHS_2018
#AMD
#2018
df_AMD_2018 <- merge(AMD_Tweets_2018, AMD_SP_2018, by = "date", all.x = TRUE)
write.csv(df_AMD_2018, file = "AMD_2018", row.names = FALSE)
df_AMD_2018
#ResMed
#2016
df_ResMed_2016 <-merge(ResMed_Tweets_2016, ResMed_SP_2016, by = "date", all.x = TRUE)
write.csv(df_ResMed_2016, file = "ResMed_2016", row.names = FALSE)
#2018
df_ResMed_2018 <-merge(ResMed_Tweets_2018, ResMed_SP_2018, by = "date", all.x = TRUE)
write.csv(df_ResMed_2018, file = "ResMed_2018", row.names = FALSE)
df_ResMed_2016
df_ResMed_2018
#CA
#2016
df_CA_2016 <-merge(CA_Tweets_2016, CA_SP_2016, by = "date", all.x = TRUE)
write.csv(df_CA_2016, file = "CA_2016", row.names = FALSE)
#2018
df_CA_2018 <-merge(CA_Tweets_2018, CA_SP_2018, by = "date", all.x = TRUE)
write.csv(df_CA_2018, file = "CA_2018", row.names = FALSE)
df_CA_2016
df_CA_2018
#General_Motors
#2016
df_GM_2016 <-merge(GM_Tweets_2016, GM_SP_2016, by = "date", all.x = TRUE)
write.csv(df_GM_2016, file = "GM_2016", row.names = FALSE)
#2018
df_GM_2018 <-merge(GM_Tweets_2018, GM_SP_2018, by = "date", all.x = TRUE)
write.csv(df_GM_2018, file = "GM_2018", row.names = FALSE)
df_GM_2016
df_GM_2018
#Aetna
#2016
df_Aetna_2016 <-merge(Aetna_Tweets_2016, AET_SP_2016, by = "date", all.x = TRUE)
write.csv(df_Aetna_2016, file = "Aetna_2016", row.names = FALSE)
#2018
df_Aetna_2018 <-merge(Aetna_Tweets_2018, Aetna_SP_2018, by = "date", all.x = TRUE)
write.csv(df_Aetna_2018, file = "Aetna_2018", row.names = FALSE)
df_Aetna_2016
df_Aetna_2018
#NRG
#2016
df_NRG_2016 <-merge(NRG_Tweets_2016, NRG_SP_2016, by = "date", all.x = TRUE)
write.csv(df_NRG_2016, file = "NRG_2016", row.names = FALSE)
#2018
df_NRG_2018 <-merge(NRG_Tweets_2018, NRG_SP_2018, by = "date", all.x = TRUE)
write.csv(df_NRG_2018, file = "NRG_2018", row.names = FALSE)
df_NRG_2016
df_NRG_2018
#Medtronic
#2016
df_Medtronic_2016 <-merge(Medtronic_Tweets_2016, Medtronic_SP_2016, by = "date", all.x = TRUE)
write.csv(df_Medtronic_2016, file = "Medtronic_2016", row.names = FALSE)
#2018
df_Medtronic_2018 <-merge(Medtronic_Tweets_2018, Medtronic_SP_2018, by = "date", all.x = TRUE)
write.csv(df_Medtronic_2018, file = "Medtronic_2018", row.names = FALSE)
df_Medtronic_2016
df_Medtronic_2018
#Juniper
#2016
df_Juniper_2016 <-merge(Juniper_Tweets_2016, Juniper_SP_2016, by = "date", all.x = TRUE)
write.csv(df_Juniper_2016, file = "Juniper_2016", row.names = FALSE)
#2018
df_Juniper_2018 <-merge(Juniper_Tweets_2018, Juniper_SP_2018, by = "date", all.x = TRUE)
write.csv(df_Juniper_2018, file = "Juniper_2018", row.names = FALSE)
df_Juniper_2016
df_Juniper_2018
#Aetna_Ramon_2018
#2018
df_AetnaR_2018 <-merge(Aetna_Tweets_2018R, AetnaR_SP_2018, by = "date", all.x = TRUE)
write.csv(df_AetnaR_2018, file = "AetnaR_2018", row.names = FALSE)
df_AetnaR_2018
#Netflix
#2016
df_Netflix_2016 <-merge(Netflix_Tweets_2016, Netflix_SP_2016, by = "date", all.x = TRUE)
write.csv(df_Netflix_2016, file = "Netflix_2016", row.names = FALSE)
df_Netflix_2016
#Disney
df_Disney_2018 <-merge(Disney_Tweets_2018, Disney_SP_2018, by = "date", all.x = TRUE)
write.csv(df_Disney_2018, file = "Disney_2018", row.names = FALSE)
df_Disney_2018
#FOX
#2015
df_Fox_2015 <-merge(Fox_Tweets_2015, Fox_SP_2015, by = "date", all.x = TRUE)
write.csv(df_Fox_2015, file = "Fox_2015", row.names = FALSE)
df_Fox_2015
#Microsoft
#2016
df_Microsoft_2016 <-merge(Microsoft_Tweets_2016, Microsoft_SP_2016, by = "date", all.x = TRUE)
write.csv(df_Microsoft_2016, file = "Microsoft_2016", row.names = FALSE)
#2018
df_Microsoft_2018 <-merge(Microsoft_Tweets_2018, Microsoft_SP_2018, by = "date", all.x = TRUE)
write.csv(df_Microsoft_2018, file = "Microsoft_2018", row.names = FALSE)
df_Microsoft_2016
df_Microsoft_2018
#Juniper_Shaygan
#2014
df_Juniper_S_2014 <-merge(JuniperS_Tweets_2014, Juniper_SP_2014, by = "date", all.x = TRUE)
write.csv(df_Juniper_S_2014, file = "Juniper_S_2014", row.names = FALSE)
df_Juniper_S_2014
df_Juniper_S_2016
#Synchrony Financial
#2016
df_SYFM_2016 <-merge(SYFM_Tweets_2016, SYF_SP_2016, by = "date", all.x = TRUE)
write.csv(df_SYFM_2016, file = "SYFM_2016", row.names = FALSE)
#2018
df_SYFM_2018 <-merge(SYFM_Tweets_2018, SYFM_SP_2018, by = "date", all.x = TRUE)
write.csv(df_SYFM_2018, file = "SYFM_2018", row.names = FALSE)
df_SYFM_2016
df_SYFM_2018
#Southern Company
#2016
df_Southern_2016 <-merge(SouthernC_Tweets_2016, SO_SP_2016, by = "date", all.x = TRUE)
write.csv(df_Southern_2016, file = "Southern_2016", row.names = FALSE)
#2018
df_Southern_2018 <-merge(SouthernC_Tweets_2018, SO_SP_2018, by = "date", all.x = TRUE)
write.csv(df_Southern_2018, file = "Southern_2018", row.names = FALSE)
df_Southern_2016
df_Southern_2018
#Apple
#2016
df_Apple_2016 <-merge(Apple_Tweets_2016, Apple_SP_2016, by = "date", all.x = TRUE)
write.csv(df_Apple_2016, file = "Apple_2016", row.names = FALSE)
#2018
df_Apple_2018 <-merge(Apple_Tweets_2018, Apple_SP_2018, by = "date", all.x = TRUE)
write.csv(df_Apple_2018, file = "Apple_2018", row.names = FALSE)
df_Apple_2016
df_Apple_2018
#XL
#2016
df_XL_2016 <-merge(XL_Tweets_2016, XL_SP_2016, by = "date", all.x = TRUE)
write.csv(df_XL_2016, file = "XL_2016", row.names = FALSE)
#2018
df_XL_2018 <-merge(XL_Tweets_2018, XL_SP_2018, by = "date", all.x = TRUE)
write.csv(df_XL_2018, file = "XL_2018", row.names = FALSE)
df_XL_2016
df_XL_2018
#Tysonfoods
#2016
df_Tysonf_2016 <-merge(TSN_Tweets_2016, TSN_SP_2016, by = "date", all.x = TRUE)
write.csv(df_Tysonf_2016, file = "TSN_2016", row.names = FALSE)
#2018
df_Tysonf_2018 <-merge(TSN_Tweets_2018, TSN_SP_2018, by = "date", all.x = TRUE)
write.csv(df_Tysonf_2018, file = "TSN_2018", row.names = FALSE)
df_Tysonf_2016
df_Tysonf_2018
#Akamai
#2016
df_Akamai_2016 <-merge(Akamai_Tweets_2016, Akamai_SP_2016, by = "date", all.x = TRUE)
write.csv(df_Akamai_2016, file = "Akamai_2016", row.names = FALSE)
#2018
df_Akamai_2018 <-merge(Akamai_Tweets_2018, Akamai_SP_2018, by = "date", all.x = TRUE)
write.csv(df_Akamai_2018, file = "Akamai_2018", row.names = FALSE)
df_Akamai_2016
df_Akamai_2018
###Clean
#Packages
library(tm)
library(stringr)
#Function to clean the tweets
clean_tweets <- function(tweets) {
# Convert text to lower case
tweets <- tolower(tweets)
# Remove URLs
tweets <- stringr::str_replace_all(tweets, "http[^[:space:]]*", "")
# Remove mentions
tweets <- stringr::str_replace_all(tweets, "@[^[:space:]]*", "")
# Remove hashtags
tweets <- stringr::str_replace_all(tweets, "#[^[:space:]]*", "")
# Remove punctuation marks
tweets <- stringr::str_replace_all(tweets, "[[:punct:]]", "")
# Remove numbers
tweets <- stringr::str_replace_all(tweets, "[[:digit:]]", "")
# Remove common stop words
tweets <- tm::removeWords(tweets, tm::stopwords("en"))
return(tweets)
}
#Lemmatize
df_Test$text <- lemmatize_words(df_Test$text)
#Numeric NA values & Tweets
#Johnson
#Remove columns
df_Johnson_2016 <- df_Johnson_2016[,-c(6,8,9,10,15,17,18,19,20,21)]
df_Johnson_2016
#Ticker
df_Johnson_2016$TICKER[is.na(df_Johnson_2016$TICKER)] <- "JCI"
#PRC
PRC_AVG <- mean(df_Johnson_2016$PRC, na.rm = TRUE)
df_Johnson_2016$PRC[is.na(df_Johnson_2016$PRC)] <- PRC_AVG
#VOL
VOL_AVG <- mean(df_Johnson_2016$VOL, na.rm = TRUE)
df_Johnson_2016$VOL[is.na(df_Johnson_2016$VOL)] <- VOL_AVG
#BID
BID_AVG <- BID_AVG <- mean(df_Johnson_2016$BID, na.rm = TRUE)
df_Johnson_2016$BID[is.na(df_Johnson_2016$BID)] <- BID_AVG
#ASK
ASK_AVG <- ASK_AVG <- mean(df_Johnson_2016$ASK, na.rm = TRUE)
df_Johnson_2016$ASK[is.na(df_Johnson_2016$ASK)] <- ASK_AVG
#OPENPRC
OPEN_AVG <- OPEN_AVG <- mean(df_Johnson_2016$OPENPRC, na.rm = TRUE)
df_Johnson_2016$OPENPRC[is.na(df_Johnson_2016$OPENPRC)] <- OPEN_AVG
df_Johnson_2016
df_Johnson_2016$text <- clean_tweets(df_Johnson_2016$text)
df_Johnson_2016$text <- lemmatize_words(df_Johnson_2016$text)
#Remove columns
df_Johnson_2017 <- df_Johnson_2017[,-c(6,8,9,10,15,17,18,19,20,21)]
df_Johnson_2017
#Ticker
df_Johnson_2017$TICKER[is.na(df_Johnson_2017$TICKER)] <- "JCI"
#PRC
PRC_AVG <- mean(df_Johnson_2017$PRC, na.rm = TRUE)
df_Johnson_2017$PRC[is.na(df_Johnson_2017$PRC)] <- PRC_AVG
#VOL
VOL_AVG <- mean(df_Johnson_2017$VOL, na.rm = TRUE)
df_Johnson_2017$VOL[is.na(df_Johnson_2017$VOL)] <- VOL_AVG
#BID
BID_AVG <- BID_AVG <- mean(df_Johnson_2017$BID, na.rm = TRUE)
df_Johnson_2017$BID[is.na(df_Johnson_2017$BID)] <- BID_AVG
#ASK
ASK_AVG <- ASK_AVG <- mean(df_Johnson_2017$ASK, na.rm = TRUE)
df_Johnson_2017$ASK[is.na(df_Johnson_2017$ASK)] <- ASK_AVG
#OPENPRC
OPEN_AVG <- OPEN_AVG <- mean(df_Johnson_2017$OPENPRC, na.rm = TRUE)
df_Johnson_2017$OPENPRC[is.na(df_Johnson_2017$OPENPRC)] <- OPEN_AVG
df_Johnson_2017
df_Johnson_2017$text <- clean_tweets(df_Johnson_2017$text)
df_Johnson_2017$text <- lemmatize_words(df_Johnson_2017$text)
#NASDAQ
#Remove columns
df_NASDAQ <- df_NASDAQ[,-c(6,8,9,10,15,17,18,19,20,21)]
df_NASDAQ
#Ticker
df_NASDAQ$TICKER[is.na(df_NASDAQ$TICKER)] <- "NDAQ"
#PRC
PRC_AVG <- mean(df_NASDAQ$PRC, na.rm = TRUE)
df_NASDAQ$PRC[is.na(df_NASDAQ$PRC)] <- PRC_AVG
#VOL
VOL_AVG <- mean(df_NASDAQ$VOL, na.rm = TRUE)
df_NASDAQ$VOL[is.na(df_NASDAQ$VOL)] <- VOL_AVG
#BID
BID_AVG <- BID_AVG <- mean(df_NASDAQ$BID, na.rm = TRUE)
df_NASDAQ$BID[is.na(df_NASDAQ$BID)] <- BID_AVG
#ASK
ASK_AVG <- ASK_AVG <- mean(df_NASDAQ$ASK, na.rm = TRUE)
df_NASDAQ$ASK[is.na(df_NASDAQ$ASK)] <- ASK_AVG
#OPENPRC
OPEN_AVG <- OPEN_AVG <- mean(df_NASDAQ$OPENPRC, na.rm = TRUE)
df_NASDAQ$OPENPRC[is.na(df_NASDAQ$OPENPRC)] <- OPEN_AVG
df_NASDAQ
df_NASDAQ$text <- clean_tweets(df_NASDAQ$text)
df_NASDAQ$text <- lemmatize_words(df_NASDAQ$text)
#Autodesk
#Remove columns
df_autodesk <- df_autodesk[,-c(6,8,9,10,15,17,18,19,20,21)]
df_autodesk
#Ticker
df_autodesk$TICKER[is.na(df_autodesk$TICKER)] <- "ADSK"
#PRC
PRC_AVG <- mean(df_autodesk$PRC, na.rm = TRUE)
df_autodesk$PRC[is.na(df_autodesk$PRC)] <- PRC_AVG
#VOL
VOL_AVG <- mean(df_autodesk$VOL, na.rm = TRUE)
df_autodesk$VOL[is.na(df_autodesk$VOL)] <- VOL_AVG
#BID
BID_AVG <- BID_AVG <- mean(df_autodesk$BID, na.rm = TRUE)
df_autodesk$BID[is.na(df_autodesk$BID)] <- BID_AVG
#ASK
ASK_AVG <- ASK_AVG <- mean(df_autodesk$ASK, na.rm = TRUE)
df_autodesk$ASK[is.na(df_autodesk$ASK)] <- ASK_AVG
#OPENPRC
OPEN_AVG <- OPEN_AVG <- mean(df_autodesk$OPENPRC, na.rm = TRUE)
df_autodesk$OPENPRC[is.na(df_autodesk$OPENPRC)] <- OPEN_AVG
df_autodesk
df_autodesk$text <- clean_tweets(df_autodesk$text)
df_autodesk$text <- lemmatize_words(df_autodesk$text)
#Hasbro
#Remove columns
df_Hasbro_2016 <- df_Hasbro_2016[,-c(6,8,9,10,15,17,18,19,20,21)]
df_Hasbro_2016
#Ticker
df_Hasbro_2016$TICKER[is.na(df_Hasbro_2016$TICKER)] <- "HAS"
#PRC
PRC_AVG <- mean(df_Hasbro_2016$PRC, na.rm = TRUE)
df_Hasbro_2016$PRC[is.na(df_Hasbro_2016$PRC)] <- PRC_AVG
#VOL
VOL_AVG <- mean(df_Hasbro_2016$VOL, na.rm = TRUE)
df_Hasbro_2016$VOL[is.na(df_Hasbro_2016$VOL)] <- VOL_AVG
#BID
BID_AVG <- BID_AVG <- mean(df_Hasbro_2016$BID, na.rm = TRUE)
df_Hasbro_2016$BID[is.na(df_Hasbro_2016$BID)] <- BID_AVG
#ASK
ASK_AVG <- ASK_AVG <- mean(df_Hasbro_2016$ASK, na.rm = TRUE)
df_Hasbro_2016$ASK[is.na(df_Hasbro_2016$ASK)] <- ASK_AVG
#OPENPRC
OPEN_AVG <- OPEN_AVG <- mean(df_Hasbro_2016$OPENPRC, na.rm = TRUE)
df_Hasbro_2016$OPENPRC[is.na(df_Hasbro_2016$OPENPRC)] <- OPEN_AVG
df_Hasbro_2016
df_Hasbro_2016$text <- clean_tweets(df_Hasbro_2016$text)
df_Hasbro_2016$text <- lemmatize_words(df_Hasbro_2016$text)
#Remove columns
df_Hasbro_2018 <- df_Hasbro_2018[,-c(6,8,9,10,15,17,18,19,20,21)]
df_Hasbro_2018
#Ticker
df_Hasbro_2018$TICKER[is.na(df_Hasbro_2018$TICKER)] <- "HAS"
#PRC
PRC_AVG <- mean(df_Hasbro_2018$PRC, na.rm = TRUE)
df_Hasbro_2018$PRC[is.na(df_Hasbro_2018$PRC)] <- PRC_AVG
#VOL
VOL_AVG <- mean(df_Hasbro_2018$VOL, na.rm = TRUE)
df_Hasbro_2018$VOL[is.na(df_Hasbro_2018$VOL)] <- VOL_AVG
#BID
BID_AVG <- BID_AVG <- mean(df_Hasbro_2018$BID, na.rm = TRUE)
df_Hasbro_2018$BID[is.na(df_Hasbro_2018$BID)] <- BID_AVG
#ASK
ASK_AVG <- ASK_AVG <- mean(df_Hasbro_2018$ASK, na.rm = TRUE)
df_Hasbro_2018$ASK[is.na(df_Hasbro_2018$ASK)] <- ASK_AVG
#OPENPRC
OPEN_AVG <- OPEN_AVG <- mean(df_Hasbro_2018$OPENPRC, na.rm = TRUE)
df_Hasbro_2018$OPENPRC[is.na(df_Hasbro_2018$OPENPRC)] <- OPEN_AVG
df_Hasbro_2018
df_Hasbro_2018$text <- clean_tweets(df_Hasbro_2018$text)
df_Hasbro_2018$text <- lemmatize_words(df_Hasbro_2018$text)
#Intel
#Remove columns
df_Intel_2016 <- df_Intel_2016[,-c(6,8,9,10,15,17,18,19,20,21)]
df_Intel_2016
#Ticker
df_Intel_2016$TICKER[is.na(df_Intel_2016$TICKER)] <- "INTC"
#PRC
PRC_AVG <- mean(df_Intel_2016$PRC, na.rm = TRUE)
df_Intel_2016$PRC[is.na(df_Intel_2016$PRC)] <- PRC_AVG
#VOL
VOL_AVG <- mean(df_Intel_2016$VOL, na.rm = TRUE)
df_Intel_2016$VOL[is.na(df_Intel_2016$VOL)] <- VOL_AVG
#BID
BID_AVG <- BID_AVG <- mean(df_Intel_2016$BID, na.rm = TRUE)
df_Intel_2016$BID[is.na(df_Intel_2016$BID)] <- BID_AVG
#ASK
ASK_AVG <- ASK_AVG <- mean(df_Intel_2016$ASK, na.rm = TRUE)
df_Intel_2016$ASK[is.na(df_Intel_2016$ASK)] <- ASK_AVG
#OPENPRC
OPEN_AVG <- OPEN_AVG <- mean(df_Intel_2016$OPENPRC, na.rm = TRUE)
df_Intel_2016$OPENPRC[is.na(df_Intel_2016$OPENPRC)] <- OPEN_AVG
df_Intel_2016
df_Intel_2016$text <- clean_tweets(df_Intel_2016$text)
df_Intel_2016$text <- lemmatize_words(df_Intel_2016$text)
#Remove columns
df_Intel_2018 <- df_Intel_2018[,-c(6,8,9,10,15,17,18,19,20,21)]
df_Intel_2018
#Ticker
df_Intel_2018$TICKER[is.na(df_Intel_2018$TICKER)] <- "INTC"
#PRC
PRC_AVG <- mean(df_Intel_2018$PRC, na.rm = TRUE)
df_Intel_2018$PRC[is.na(df_Intel_2018$PRC)] <- PRC_AVG
#VOL
VOL_AVG <- mean(df_Intel_2018$VOL, na.rm = TRUE)
df_Intel_2018$VOL[is.na(df_Intel_2018$VOL)] <- VOL_AVG
#BID
BID_AVG <- BID_AVG <- mean(df_Intel_2018$BID, na.rm = TRUE)
df_Intel_2018$BID[is.na(df_Intel_2018$BID)] <- BID_AVG
#ASK
ASK_AVG <- ASK_AVG <- mean(df_Intel_2018$ASK, na.rm = TRUE)
df_Intel_2018$ASK[is.na(df_Intel_2018$ASK)] <- ASK_AVG
#OPENPRC
OPEN_AVG <- OPEN_AVG <- mean(df_Intel_2018$OPENPRC, na.rm = TRUE)
df_Intel_2018$OPENPRC[is.na(df_Intel_2018$OPENPRC)] <- OPEN_AVG
df_Intel_2018
df_Intel_2018$text <- clean_tweets(df_Intel_2018$text)
df_Intel_2018$text <- lemmatize_words(df_Intel_2018$text)
#Activision
#Remove columns
df_Activision_2016 <- df_Activision_2016[,-c(6,8,9,10,15,17,18,19,20,21)]
df_Activision_2016
#Ticker
df_Activision_2016$TICKER[is.na(df_Activision_2016$TICKER)] <- "ATVI"
#PRC
PRC_AVG <- mean(df_Activision_2016$PRC, na.rm = TRUE)
df_Activision_2016$PRC[is.na(df_Activision_2016$PRC)] <- PRC_AVG
#VOL
VOL_AVG <- mean(df_Activision_2016$VOL, na.rm = TRUE)
df_Activision_2016$VOL[is.na(df_Activision_2016$VOL)] <- VOL_AVG
#BID
BID_AVG <- BID_AVG <- mean(df_Activision_2016$BID, na.rm = TRUE)
df_Activision_2016$BID[is.na(df_Activision_2016$BID)] <- BID_AVG
#ASK
ASK_AVG <- ASK_AVG <- mean(df_Activision_2016$ASK, na.rm = TRUE)
df_Activision_2016$ASK[is.na(df_Activision_2016$ASK)] <- ASK_AVG
#OPENPRC
OPEN_AVG <- OPEN_AVG <- mean(df_Activision_2016$OPENPRC, na.rm = TRUE)
df_Activision_2016$OPENPRC[is.na(df_Activision_2016$OPENPRC)] <- OPEN_AVG
df_Activision_2016
df_Activision_2016$text <- clean_tweets(df_Activision_2016$text)
df_Activision_2016$text <- lemmatize_words(df_Activision_2016$text)
#Remove columns
df_Activision_2018 <- df_Activision_2018[,-c(6,8,9,10,15,17,18,19,20,21)]
df_Activision_2018
#Ticker
df_Activision_2018$TICKER[is.na(df_Activision_2018$TICKER)] <- "ATVI"
#PRC
PRC_AVG <- mean(df_Activision_2018$PRC, na.rm = TRUE)
df_Activision_2018$PRC[is.na(df_Activision_2018$PRC)] <- PRC_AVG
#VOL
VOL_AVG <- mean(df_Activision_2018$VOL, na.rm = TRUE)
df_Activision_2018$VOL[is.na(df_Activision_2018$VOL)] <- VOL_AVG
#BID
BID_AVG <- BID_AVG <- mean(df_Activision_2018$BID, na.rm = TRUE)
df_Activision_2018$BID[is.na(df_Activision_2018$BID)] <- BID_AVG
#ASK
ASK_AVG <- ASK_AVG <- mean(df_Activision_2018$ASK, na.rm = TRUE)
df_Activision_2018$ASK[is.na(df_Activision_2018$ASK)] <- ASK_AVG
#OPENPRC
OPEN_AVG <- OPEN_AVG <- mean(df_Activision_2018$OPENPRC, na.rm = TRUE)
df_Activision_2018$OPENPRC[is.na(df_Activision_2018$OPENPRC)] <- OPEN_AVG
df_Activision_2018
df_Activision_2018$text <- clean_tweets(df_Activision_2018$text)
df_Activision_2018$text <- lemmatize_words(df_Activision_2018$text)
#Inuit
#Remove columns
df_Inuit_2016 <- df_Inuit_2016[,-c(6,8,9,10,15,17,18,19,20,21)]
df_Inuit_2016
#Ticker
df_Inuit_2016$TICKER[is.na(df_Inuit_2016$TICKER)] <- "INTU"
#PRC
PRC_AVG <- mean(df_Inuit_2016$PRC, na.rm = TRUE)
df_Inuit_2016$PRC[is.na(df_Inuit_2016$PRC)] <- PRC_AVG
#VOL
VOL_AVG <- mean(df_Inuit_2016$VOL, na.rm = TRUE)
df_Inuit_2016$VOL[is.na(df_Inuit_2016$VOL)] <- VOL_AVG
#BID
BID_AVG <- BID_AVG <- mean(df_Inuit_2016$BID, na.rm = TRUE)
df_Inuit_2016$BID[is.na(df_Inuit_2016$BID)] <- BID_AVG
#ASK
ASK_AVG <- ASK_AVG <- mean(df_Inuit_2016$ASK, na.rm = TRUE)
df_Inuit_2016$ASK[is.na(df_Inuit_2016$ASK)] <- ASK_AVG
#OPENPRC
OPEN_AVG <- OPEN_AVG <- mean(df_Inuit_2016$OPENPRC, na.rm = TRUE)
df_Inuit_2016$OPENPRC[is.na(df_Inuit_2016$OPENPRC)] <- OPEN_AVG
df_Inuit_2016
df_Inuit_2016$text <- clean_tweets(df_Inuit_2016$text)
df_Inuit_2016$text <- lemmatize_words(df_Inuit_2016$text)
#Remove columns
df_Inuit_2018 <- df_Inuit_2018[,-c(6,8,9,10,15,17,18,19,20,21)]
df_Inuit_2018
#Ticker
df_Inuit_2018$TICKER[is.na(df_Inuit_2018$TICKER)] <- "INTU"
#PRC
PRC_AVG <- mean(df_Inuit_2018$PRC, na.rm = TRUE)
df_Inuit_2018$PRC[is.na(df_Inuit_2018$PRC)] <- PRC_AVG
#VOL
VOL_AVG <- mean(df_Inuit_2018$VOL, na.rm = TRUE)
df_Inuit_2018$VOL[is.na(df_Inuit_2018$VOL)] <- VOL_AVG
#BID
BID_AVG <- BID_AVG <- mean(df_Inuit_2018$BID, na.rm = TRUE)
df_Inuit_2018$BID[is.na(df_Inuit_2018$BID)] <- BID_AVG
#ASK
ASK_AVG <- ASK_AVG <- mean(df_Inuit_2018$ASK, na.rm = TRUE)
df_Inuit_2018$ASK[is.na(df_Inuit_2018$ASK)] <- ASK_AVG
#OPENPRC
OPEN_AVG <- OPEN_AVG <- mean(df_Inuit_2018$OPENPRC, na.rm = TRUE)
df_Inuit_2018$OPENPRC[is.na(df_Inuit_2018$OPENPRC)] <- OPEN_AVG
df_Inuit_2018
df_Inuit_2018$text <- clean_tweets(df_Inuit_2018$text)
df_Inuit_2018$text <- lemmatize_words(df_Inuit_2018$text)
#Allergan
#Remove columns
df_Allergan_2016 <- df_Allergan_2016[,-c(6,8,9,10,15,17,18,19,20,21)]
df_Allergan_2016
#Ticker
df_Allergan_2016$TICKER[is.na(df_Allergan_2016$TICKER)] <- "AGN"
#PRC
PRC_AVG <- mean(df_Allergan_2016$PRC, na.rm = TRUE)
df_Allergan_2016$PRC[is.na(df_Allergan_2016$PRC)] <- PRC_AVG
#VOL
VOL_AVG <- mean(df_Allergan_2016$VOL, na.rm = TRUE)
df_Allergan_2016$VOL[is.na(df_Allergan_2016$VOL)] <- VOL_AVG
#BID
BID_AVG <- BID_AVG <- mean(df_Allergan_2016$BID, na.rm = TRUE)
df_Allergan_2016$BID[is.na(df_Allergan_2016$BID)] <- BID_AVG
#ASK
ASK_AVG <- ASK_AVG <- mean(df_Allergan_2016$ASK, na.rm = TRUE)
df_Allergan_2016$ASK[is.na(df_Allergan_2016$ASK)] <- ASK_AVG
#OPENPRC
OPEN_AVG <- OPEN_AVG <- mean(df_Allergan_2016$OPENPRC, na.rm = TRUE)
df_Allergan_2016$OPENPRC[is.na(df_Allergan_2016$OPENPRC)] <- OPEN_AVG
df_Allergan_2016
df_Allergan_2016$text <- clean_tweets(df_Allergan_2016$text)
df_Allergan_2016$text <- lemmatize_words(df_Allergan_2016$text)
#Remove columns
df_Allergan_2018 <- df_Allergan_2018[,-c(6,8,9,10,15,17,18,19,20,21)]
df_Allergan_2018
#Ticker
df_Allergan_2018$TICKER[is.na(df_Allergan_2018$TICKER)] <- "AGN"
#PRC
PRC_AVG <- mean(df_Allergan_2018$PRC, na.rm = TRUE)
df_Allergan_2018$PRC[is.na(df_Allergan_2018$PRC)] <- PRC_AVG
#VOL
VOL_AVG <- mean(df_Allergan_2018$VOL, na.rm = TRUE)
df_Allergan_2018$VOL[is.na(df_Allergan_2018$VOL)] <- VOL_AVG
#BID
BID_AVG <- BID_AVG <- mean(df_Allergan_2018$BID, na.rm = TRUE)
df_Allergan_2018$BID[is.na(df_Allergan_2018$BID)] <- BID_AVG
#ASK
ASK_AVG <- ASK_AVG <- mean(df_Allergan_2018$ASK, na.rm = TRUE)
df_Allergan_2018$ASK[is.na(df_Allergan_2018$ASK)] <- ASK_AVG
#OPENPRC
OPEN_AVG <- OPEN_AVG <- mean(df_Allergan_2018$OPENPRC, na.rm = TRUE)
df_Allergan_2018$OPENPRC[is.na(df_Allergan_2018$OPENPRC)] <- OPEN_AVG
df_Allergan_2018
df_Allergan_2018$text <- clean_tweets(df_Allergan_2018$text)
df_Allergan_2018$text <- lemmatize_words(df_Allergan_2018$text)
#Humana
#Remove columns
df_Humana_2016 <- df_Humana_2016[,-c(6,8,9,10,15,17,18,19,20,21)]
df_Humana_2016
#Ticker
df_Humana_2016$TICKER[is.na(df_Humana_2016$TICKER)] <- "HUM"
#PRC
PRC_AVG <- mean(df_Humana_2016$PRC, na.rm = TRUE)
df_Humana_2016$PRC[is.na(df_Humana_2016$PRC)] <- PRC_AVG
#VOL
VOL_AVG <- mean(df_Humana_2016$VOL, na.rm = TRUE)
df_Humana_2016$VOL[is.na(df_Humana_2016$VOL)] <- VOL_AVG
#BID
BID_AVG <- BID_AVG <- mean(df_Humana_2016$BID, na.rm = TRUE)
df_Humana_2016$BID[is.na(df_Humana_2016$BID)] <- BID_AVG
#ASK
ASK_AVG <- ASK_AVG <- mean(df_Humana_2016$ASK, na.rm = TRUE)
df_Humana_2016$ASK[is.na(df_Humana_2016$ASK)] <- ASK_AVG
#OPENPRC
OPEN_AVG <- OPEN_AVG <- mean(df_Humana_2016$OPENPRC, na.rm = TRUE)
df_Humana_2016$OPENPRC[is.na(df_Humana_2016$OPENPRC)] <- OPEN_AVG
df_Humana_2016
df_Humana_2016$text <- clean_tweets(df_Humana_2016$text)
df_Humana_2016$text <- lemmatize_words(df_Humana_2016$text)
#Remove columns
df_Humana_2018 <- df_Humana_2018[,-c(6,8,9,10,15,17,18,19,20,21)]
df_Humana_2018
#Ticker
df_Humana_2018$TICKER[is.na(df_Humana_2018$TICKER)] <- "HUM"
#PRC
PRC_AVG <- mean(df_Humana_2018$PRC, na.rm = TRUE)
df_Humana_2018$PRC[is.na(df_Humana_2018$PRC)] <- PRC_AVG
#VOL
VOL_AVG <- mean(df_Humana_2018$VOL, na.rm = TRUE)
df_Humana_2018$VOL[is.na(df_Humana_2018$VOL)] <- VOL_AVG
#BID
BID_AVG <- BID_AVG <- mean(df_Humana_2018$BID, na.rm = TRUE)
df_Humana_2018$BID[is.na(df_Humana_2018$BID)] <- BID_AVG
#ASK
ASK_AVG <- ASK_AVG <- mean(df_Humana_2018$ASK, na.rm = TRUE)
df_Humana_2018$ASK[is.na(df_Humana_2018$ASK)] <- ASK_AVG
#OPENPRC
OPEN_AVG <- OPEN_AVG <- mean(df_Humana_2018$OPENPRC, na.rm = TRUE)
df_Humana_2018$OPENPRC[is.na(df_Humana_2018$OPENPRC)] <- OPEN_AVG
df_Humana_2018
df_Humana_2018$text <- clean_tweets(df_Humana_2018$text)
df_Humana_2018$text <- lemmatize_words(df_Humana_2018$text)
#CB Autodesk
#Remove columns
df_CB_autodesk_2016 <- df_CB_autodesk_2016[,-c(6,8,9,10,15,17,18,19,20,21)]
df_CB_autodesk_2016
#Ticker
df_CB_autodesk_2016$TICKER[is.na(df_CB_autodesk_2016$TICKER)] <- "ADSK"
#PRC
PRC_AVG <- mean(df_CB_autodesk_2016$PRC, na.rm = TRUE)
df_CB_autodesk_2016$PRC[is.na(df_CB_autodesk_2016$PRC)] <- PRC_AVG
#VOL
VOL_AVG <- mean(df_CB_autodesk_2016$VOL, na.rm = TRUE)
df_CB_autodesk_2016$VOL[is.na(df_CB_autodesk_2016$VOL)] <- VOL_AVG
#BID
BID_AVG <- BID_AVG <- mean(df_CB_autodesk_2016$BID, na.rm = TRUE)
df_CB_autodesk_2016$BID[is.na(df_CB_autodesk_2016$BID)] <- BID_AVG
#ASK
ASK_AVG <- ASK_AVG <- mean(df_CB_autodesk_2016$ASK, na.rm = TRUE)
df_CB_autodesk_2016$ASK[is.na(df_CB_autodesk_2016$ASK)] <- ASK_AVG
#OPENPRC
OPEN_AVG <- OPEN_AVG <- mean(df_CB_autodesk_2016$OPENPRC, na.rm = TRUE)
df_CB_autodesk_2016$OPENPRC[is.na(df_CB_autodesk_2016$OPENPRC)] <- OPEN_AVG
df_CB_autodesk_2016
df_CB_autodesk_2016$text <- clean_tweets(df_CB_autodesk_2016$text)
df_CB_autodesk_2016$text <- lemmatize_words(df_CB_autodesk_2016$text)
#Remove columns
df_CB_autodesk_2018 <- df_CB_autodesk_2018[,-c(6,8,9,10,15,17,18,19,20,21)]
df_CB_autodesk_2018
#Ticker
df_CB_autodesk_2018$TICKER[is.na(df_CB_autodesk_2018$TICKER)] <- "ADSK"
#PRC
PRC_AVG <- mean(df_CB_autodesk_2018$PRC, na.rm = TRUE)
df_CB_autodesk_2018$PRC[is.na(df_CB_autodesk_2018$PRC)] <- PRC_AVG
#VOL
VOL_AVG <- mean(df_CB_autodesk_2018$VOL, na.rm = TRUE)
df_CB_autodesk_2018$VOL[is.na(df_CB_autodesk_2018$VOL)] <- VOL_AVG
#BID
BID_AVG <- BID_AVG <- mean(df_CB_autodesk_2018$BID, na.rm = TRUE)
df_CB_autodesk_2018$BID[is.na(df_CB_autodesk_2018$BID)] <- BID_AVG
#ASK
ASK_AVG <- ASK_AVG <- mean(df_CB_autodesk_2018$ASK, na.rm = TRUE)
df_CB_autodesk_2018$ASK[is.na(df_CB_autodesk_2018$ASK)] <- ASK_AVG
#OPENPRC
OPEN_AVG <- OPEN_AVG <- mean(df_CB_autodesk_2018$OPENPRC, na.rm = TRUE)
df_CB_autodesk_2018$OPENPRC[is.na(df_CB_autodesk_2018$OPENPRC)] <- OPEN_AVG
df_CB_autodesk_2018
df_CB_autodesk_2018$text <- clean_tweets(df_CB_autodesk_2018$text)
df_CB_autodesk_2018$text <- lemmatize_words(df_CB_autodesk_2018$text)
#Equinox
#Remove columns
df_equinox_2018 <- df_equinox_2018[,-c(6,8,9,10,15,17,18,19,20,21)]
df_equinox_2018
#Ticker
df_equinox_2018$TICKER[is.na(df_equinox_2018$TICKER)] <- "EQIX"
#PRC
PRC_AVG <- mean(df_equinox_2018$PRC, na.rm = TRUE)
df_equinox_2018$PRC[is.na(df_equinox_2018$PRC)] <- PRC_AVG
#VOL
VOL_AVG <- mean(df_equinox_2018$VOL, na.rm = TRUE)
df_equinox_2018$VOL[is.na(df_equinox_2018$VOL)] <- VOL_AVG
#BID
BID_AVG <- BID_AVG <- mean(df_equinox_2018$BID, na.rm = TRUE)
df_equinox_2018$BID[is.na(df_equinox_2018$BID)] <- BID_AVG
#ASK
ASK_AVG <- ASK_AVG <- mean(df_equinox_2018$ASK, na.rm = TRUE)
df_equinox_2018$ASK[is.na(df_equinox_2018$ASK)] <- ASK_AVG
#OPENPRC
OPEN_AVG <- OPEN_AVG <- mean(df_equinox_2018$OPENPRC, na.rm = TRUE)
df_equinox_2018$OPENPRC[is.na(df_equinox_2018$OPENPRC)] <- OPEN_AVG
df_equinox_2018
df_equinox_2018$text <- clean_tweets(df_equinox_2018$text)
df_equinox_2018$text <- lemmatize_words(df_equinox_2018$text)
#Cisco
#Remove columns
df_cisco_2016 <- df_cisco_2016[,-c(6,8,9,10,15,17,18,19,20,21)]
df_cisco_2016
#Ticker
df_cisco_2016$TICKER[is.na(df_cisco_2016$TICKER)] <- "CSCO"
#PRC
PRC_AVG <- mean(df_cisco_2016$PRC, na.rm = TRUE)
df_cisco_2016$PRC[is.na(df_cisco_2016$PRC)] <- PRC_AVG
#VOL
VOL_AVG <- mean(df_cisco_2016$VOL, na.rm = TRUE)
df_cisco_2016$VOL[is.na(df_cisco_2016$VOL)] <- VOL_AVG
#BID
BID_AVG <- BID_AVG <- mean(df_cisco_2016$BID, na.rm = TRUE)
df_cisco_2016$BID[is.na(df_cisco_2016$BID)] <- BID_AVG
#ASK
ASK_AVG <- ASK_AVG <- mean(df_cisco_2016$ASK, na.rm = TRUE)
df_cisco_2016$ASK[is.na(df_cisco_2016$ASK)] <- ASK_AVG
#OPENPRC
OPEN_AVG <- OPEN_AVG <- mean(df_cisco_2016$OPENPRC, na.rm = TRUE)
df_cisco_2016$OPENPRC[is.na(df_cisco_2016$OPENPRC)] <- OPEN_AVG
df_cisco_2016
df_cisco_2016$text <- clean_tweets(df_cisco_2016$text)
df_cisco_2016$text <- lemmatize_words(df_cisco_2016$text)
#Remove columns
df_cisco_2018 <- df_cisco_2018[,-c(6,8,9,10,15,17,18,19,20,21)]
df_cisco_2018
#Ticker
df_cisco_2018$TICKER[is.na(df_cisco_2018$TICKER)] <- "CSCO"
#PRC
PRC_AVG <- mean(df_cisco_2018$PRC, na.rm = TRUE)
df_cisco_2018$PRC[is.na(df_cisco_2018$PRC)] <- PRC_AVG
#VOL
VOL_AVG <- mean(df_cisco_2018$VOL, na.rm = TRUE)
df_cisco_2018$VOL[is.na(df_cisco_2018$VOL)] <- VOL_AVG
#BID
BID_AVG <- BID_AVG <- mean(df_cisco_2018$BID, na.rm = TRUE)
df_cisco_2018$BID[is.na(df_cisco_2018$BID)] <- BID_AVG
#ASK
ASK_AVG <- ASK_AVG <- mean(df_cisco_2018$ASK, na.rm = TRUE)
df_cisco_2018$ASK[is.na(df_cisco_2018$ASK)] <- ASK_AVG
#OPENPRC
OPEN_AVG <- OPEN_AVG <- mean(df_cisco_2018$OPENPRC, na.rm = TRUE)
df_cisco_2018$OPENPRC[is.na(df_cisco_2018$OPENPRC)] <- OPEN_AVG
df_cisco_2018
df_cisco_2018$text <- clean_tweets(df_cisco_2018$text)
df_cisco_2018$text <- lemmatize_words(df_cisco_2018$text)
#EBAY
#Remove columns
df_EBAY_2016 <- df_EBAY_2016[,-c(6,8,9,10,15,17,18,19,20,21)]
df_EBAY_2016
#Ticker
df_EBAY_2016$TICKER[is.na(df_EBAY_2016$TICKER)] <- "EBAY"
#PRC
PRC_AVG <- mean(df_EBAY_2016$PRC, na.rm = TRUE)
df_EBAY_2016$PRC[is.na(df_EBAY_2016$PRC)] <- PRC_AVG
#VOL
VOL_AVG <- mean(df_EBAY_2016$VOL, na.rm = TRUE)
df_EBAY_2016$VOL[is.na(df_EBAY_2016$VOL)] <- VOL_AVG
#BID
BID_AVG <- BID_AVG <- mean(df_EBAY_2016$BID, na.rm = TRUE)
df_EBAY_2016$BID[is.na(df_EBAY_2016$BID)] <- BID_AVG
#ASK
ASK_AVG <- ASK_AVG <- mean(df_EBAY_2016$ASK, na.rm = TRUE)
df_EBAY_2016$ASK[is.na(df_EBAY_2016$ASK)] <- ASK_AVG
#OPENPRC
OPEN_AVG <- OPEN_AVG <- mean(df_EBAY_2016$OPENPRC, na.rm = TRUE)
df_EBAY_2016$OPENPRC[is.na(df_EBAY_2016$OPENPRC)] <- OPEN_AVG
df_EBAY_2016
df_EBAY_2016$text <- clean_tweets(df_EBAY_2016$text)
df_EBAY_2016$text <- lemmatize_words(df_EBAY_2016$text)
#Remove columns
df_EBAY_2018 <- df_EBAY_2018[,-c(6,8,9,10,15,17,18,19,20,21)]
df_EBAY_2018
#Ticker
df_EBAY_2018$TICKER[is.na(df_EBAY_2018$TICKER)] <- "EBAY"
#PRC
PRC_AVG <- mean(df_EBAY_2018$PRC, na.rm = TRUE)
df_EBAY_2018$PRC[is.na(df_EBAY_2018$PRC)] <- PRC_AVG
#VOL
VOL_AVG <- mean(df_EBAY_2018$VOL, na.rm = TRUE)
df_EBAY_2018$VOL[is.na(df_EBAY_2018$VOL)] <- VOL_AVG
#BID
BID_AVG <- BID_AVG <- mean(df_EBAY_2018$BID, na.rm = TRUE)
df_EBAY_2018$BID[is.na(df_EBAY_2018$BID)] <- BID_AVG
#ASK
ASK_AVG <- ASK_AVG <- mean(df_EBAY_2018$ASK, na.rm = TRUE)
df_EBAY_2018$ASK[is.na(df_EBAY_2018$ASK)] <- ASK_AVG
#OPENPRC
OPEN_AVG <- OPEN_AVG <- mean(df_EBAY_2018$OPENPRC, na.rm = TRUE)
df_EBAY_2018$OPENPRC[is.na(df_EBAY_2018$OPENPRC)] <- OPEN_AVG
df_EBAY_2018
df_EBAY_2018$text <- clean_tweets(df_EBAY_2018$text)
df_EBAY_2018$text <- lemmatize_words(df_EBAY_2018$text)
#Davita
#Remove columns
df_Davita_2016 <- df_Davita_2016[,-c(6,8,9,10,15,17,18,19,20,21)]
df_Davita_2016
#Ticker
df_Davita_2016$TICKER[is.na(df_Davita_2016$TICKER)] <- "DVA"
#PRC
PRC_AVG <- mean(df_Davita_2016$PRC, na.rm = TRUE)
df_Davita_2016$PRC[is.na(df_Davita_2016$PRC)] <- PRC_AVG
#VOL
VOL_AVG <- mean(df_Davita_2016$VOL, na.rm = TRUE)
df_Davita_2016$VOL[is.na(df_Davita_2016$VOL)] <- VOL_AVG
#BID
BID_AVG <- BID_AVG <- mean(df_Davita_2016$BID, na.rm = TRUE)
df_Davita_2016$BID[is.na(df_Davita_2016$BID)] <- BID_AVG
#ASK
ASK_AVG <- ASK_AVG <- mean(df_Davita_2016$ASK, na.rm = TRUE)
df_Davita_2016$ASK[is.na(df_Davita_2016$ASK)] <- ASK_AVG
#OPENPRC
OPEN_AVG <- OPEN_AVG <- mean(df_Davita_2016$OPENPRC, na.rm = TRUE)
df_Davita_2016$OPENPRC[is.na(df_Davita_2016$OPENPRC)] <- OPEN_AVG
df_Davita_2016
df_Davita_2016$text <- clean_tweets(df_Davita_2016$text)
df_Davita_2016$text <- lemmatize_words(df_Davita_2016$text)
#Remove columns
df_Davita_2017 <- df_Davita_2017[,-c(6,8,9,10,15,17,18,19,20,21)]
df_Davita_2017
#Ticker
df_Davita_2017$TICKER[is.na(df_Davita_2017$TICKER)] <- "DVA"
#PRC
PRC_AVG <- mean(df_Davita_2017$PRC, na.rm = TRUE)
df_Davita_2017$PRC[is.na(df_Davita_2017$PRC)] <- PRC_AVG
#VOL
VOL_AVG <- mean(df_Davita_2017$VOL, na.rm = TRUE)
df_Davita_2017$VOL[is.na(df_Davita_2017$VOL)] <- VOL_AVG
#BID
BID_AVG <- BID_AVG <- mean(df_Davita_2017$BID, na.rm = TRUE)
df_Davita_2017$BID[is.na(df_Davita_2017$BID)] <- BID_AVG
#ASK
ASK_AVG <- ASK_AVG <- mean(df_Davita_2017$ASK, na.rm = TRUE)
df_Davita_2017$ASK[is.na(df_Davita_2017$ASK)] <- ASK_AVG
#OPENPRC
OPEN_AVG <- OPEN_AVG <- mean(df_Davita_2017$OPENPRC, na.rm = TRUE)
df_Davita_2017$OPENPRC[is.na(df_Davita_2017$OPENPRC)] <- OPEN_AVG
df_Davita_2017
df_Davita_2017$text <- clean_tweets(df_Davita_2017$text)
df_Davita_2017$text <- lemmatize_words(df_Davita_2017$text)
#Illumina
#Remove columns
df_Illumina_2016 <- df_Illumina_2016[,-c(6,8,9,10,15,17,18,19,20,21)]
df_Illumina_2016
#Ticker
df_Illumina_2016$TICKER[is.na(df_Illumina_2016$TICKER)] <- "ILMN"
#PRC
PRC_AVG <- mean(df_Illumina_2016$PRC, na.rm = TRUE)
df_Illumina_2016$PRC[is.na(df_Illumina_2016$PRC)] <- PRC_AVG
#VOL
VOL_AVG <- mean(df_Illumina_2016$VOL, na.rm = TRUE)
df_Illumina_2016$VOL[is.na(df_Illumina_2016$VOL)] <- VOL_AVG
#BID
BID_AVG <- BID_AVG <- mean(df_Illumina_2016$BID, na.rm = TRUE)
df_Illumina_2016$BID[is.na(df_Illumina_2016$BID)] <- BID_AVG
#ASK
ASK_AVG <- ASK_AVG <- mean(df_Illumina_2016$ASK, na.rm = TRUE)
df_Illumina_2016$ASK[is.na(df_Illumina_2016$ASK)] <- ASK_AVG
#OPENPRC
OPEN_AVG <- OPEN_AVG <- mean(df_Illumina_2016$OPENPRC, na.rm = TRUE)
df_Illumina_2016$OPENPRC[is.na(df_Illumina_2016$OPENPRC)] <- OPEN_AVG
df_Illumina_2016
df_Illumina_2016$text <- clean_tweets(df_Illumina_2016$text)
df_Illumina_2016$text <- lemmatize_words(df_Illumina_2016$text)
#Remove columns
df_Illumina_2018 <- df_Illumina_2018[,-c(6,8,9,10,15,17,18,19,20,21)]
df_Illumina_2018
#Ticker
df_Illumina_2018$TICKER[is.na(df_Illumina_2018$TICKER)] <- "ILMN"
#PRC
PRC_AVG <- mean(df_Illumina_2018$PRC, na.rm = TRUE)
df_Illumina_2018$PRC[is.na(df_Illumina_2018$PRC)] <- PRC_AVG
#VOL
VOL_AVG <- mean(df_Illumina_2018$VOL, na.rm = TRUE)
df_Illumina_2018$VOL[is.na(df_Illumina_2018$VOL)] <- VOL_AVG
#BID
BID_AVG <- BID_AVG <- mean(df_Illumina_2018$BID, na.rm = TRUE)
df_Illumina_2018$BID[is.na(df_Illumina_2018$BID)] <- BID_AVG
#ASK
ASK_AVG <- ASK_AVG <- mean(df_Illumina_2018$ASK, na.rm = TRUE)
df_Illumina_2018$ASK[is.na(df_Illumina_2018$ASK)] <- ASK_AVG
#OPENPRC
OPEN_AVG <- OPEN_AVG <- mean(df_Illumina_2018$OPENPRC, na.rm = TRUE)
df_Illumina_2018$OPENPRC[is.na(df_Illumina_2018$OPENPRC)] <- OPEN_AVG
df_Illumina_2018
df_Illumina_2018$text <- clean_tweets(df_Illumina_2018$text)
df_Illumina_2018$text <- lemmatize_words(df_Illumina_2018$text)
#Homedepot
#Remove columns
df_Homedepot_2013 <- df_Homedepot_2013[,-c(6,8,9,10,15,17,18,19,20,21)]
df_Homedepot_2013
#Ticker
df_Homedepot_2013$TICKER[is.na(df_Homedepot_2013$TICKER)] <- "HD"
#PRC
PRC_AVG <- mean(df_Homedepot_2013$PRC, na.rm = TRUE)
df_Homedepot_2013$PRC[is.na(df_Homedepot_2013$PRC)] <- PRC_AVG
#VOL
VOL_AVG <- mean(df_Homedepot_2013$VOL, na.rm = TRUE)
df_Homedepot_2013$VOL[is.na(df_Homedepot_2013$VOL)] <- VOL_AVG
#BID
BID_AVG <- BID_AVG <- mean(df_Homedepot_2013$BID, na.rm = TRUE)
df_Homedepot_2013$BID[is.na(df_Homedepot_2013$BID)] <- BID_AVG
#ASK
ASK_AVG <- ASK_AVG <- mean(df_Homedepot_2013$ASK, na.rm = TRUE)
df_Homedepot_2013$ASK[is.na(df_Homedepot_2013$ASK)] <- ASK_AVG
#OPENPRC
OPEN_AVG <- OPEN_AVG <- mean(df_Homedepot_2013$OPENPRC, na.rm = TRUE)
df_Homedepot_2013$OPENPRC[is.na(df_Homedepot_2013$OPENPRC)] <- OPEN_AVG
df_Homedepot_2013
df_Homedepot_2013$text <- clean_tweets(df_Homedepot_2013$text)
df_Homedepot_2013$text <- lemmatize_words(df_Homedepot_2013$text)
#Southwest Airlines
#Remove columns
df_Southwest_2016 <- df_Southwest_2016[,-c(6,8,9,10,15,17,18,19,20,21)]
df_Southwest_2016
#Ticker
df_Southwest_2016$TICKER[is.na(df_Southwest_2016$TICKER)] <- "LUV"
#PRC
PRC_AVG <- mean(df_Southwest_2016$PRC, na.rm = TRUE)
df_Southwest_2016$PRC[is.na(df_Southwest_2016$PRC)] <- PRC_AVG
#VOL
VOL_AVG <- mean(df_Southwest_2016$VOL, na.rm = TRUE)
df_Southwest_2016$VOL[is.na(df_Southwest_2016$VOL)] <- VOL_AVG
#BID
BID_AVG <- BID_AVG <- mean(df_Southwest_2016$BID, na.rm = TRUE)
df_Southwest_2016$BID[is.na(df_Southwest_2016$BID)] <- BID_AVG
#ASK
ASK_AVG <- ASK_AVG <- mean(df_Southwest_2016$ASK, na.rm = TRUE)
df_Southwest_2016$ASK[is.na(df_Southwest_2016$ASK)] <- ASK_AVG
#OPENPRC
OPEN_AVG <- OPEN_AVG <- mean(df_Southwest_2016$OPENPRC, na.rm = TRUE)
df_Southwest_2016$OPENPRC[is.na(df_Southwest_2016$OPENPRC)] <- OPEN_AVG
df_Southwest_2016
df_Southwest_2016$text <- clean_tweets(df_Southwest_2016$text)
df_Southwest_2016$text <- lemmatize_words(df_Southwest_2016$text)
#Remove columns
df_Southwest_2018 <- df_Southwest_2018[,-c(6,8,9,10,15,17,18,19,20,21)]
df_Southwest_2018
#Ticker
df_Southwest_2018$TICKER[is.na(df_Southwest_2018$TICKER)] <- "LUV"
#PRC
PRC_AVG <- mean(df_Southwest_2018$PRC, na.rm = TRUE)
df_Southwest_2018$PRC[is.na(df_Southwest_2018$PRC)] <- PRC_AVG
#VOL
VOL_AVG <- mean(df_Southwest_2018$VOL, na.rm = TRUE)
df_Southwest_2018$VOL[is.na(df_Southwest_2018$VOL)] <- VOL_AVG
#BID
BID_AVG <- BID_AVG <- mean(df_Southwest_2018$BID, na.rm = TRUE)
df_Southwest_2018$BID[is.na(df_Southwest_2018$BID)] <- BID_AVG
#ASK
ASK_AVG <- ASK_AVG <- mean(df_Southwest_2018$ASK, na.rm = TRUE)
df_Southwest_2018$ASK[is.na(df_Southwest_2018$ASK)] <- ASK_AVG
#OPENPRC
OPEN_AVG <- OPEN_AVG <- mean(df_Southwest_2018$OPENPRC, na.rm = TRUE)
df_Southwest_2018$OPENPRC[is.na(df_Southwest_2018$OPENPRC)] <- OPEN_AVG
df_Southwest_2018
df_Southwest_2018$text <- clean_tweets(df_Southwest_2018$text)
df_Southwest_2018$text <- lemmatize_words(df_Southwest_2018$text)
#FIS
#Remove columns
df_FIS_2016 <- df_FIS_2016[,-c(6,8,9,10,15,17,18,19,20,21)]
df_FIS_2016
#Ticker
df_FIS_2016$TICKER[is.na(df_FIS_2016$TICKER)] <- "FIS"
#PRC
PRC_AVG <- mean(df_FIS_2016$PRC, na.rm = TRUE)
df_FIS_2016$PRC[is.na(df_FIS_2016$PRC)] <- PRC_AVG
#VOL
VOL_AVG <- mean(df_FIS_2016$VOL, na.rm = TRUE)
df_FIS_2016$VOL[is.na(df_FIS_2016$VOL)] <- VOL_AVG
#BID
BID_AVG <- BID_AVG <- mean(df_FIS_2016$BID, na.rm = TRUE)
df_FIS_2016$BID[is.na(df_FIS_2016$BID)] <- BID_AVG
#ASK
ASK_AVG <- ASK_AVG <- mean(df_FIS_2016$ASK, na.rm = TRUE)
df_FIS_2016$ASK[is.na(df_FIS_2016$ASK)] <- ASK_AVG
#OPENPRC
OPEN_AVG <- OPEN_AVG <- mean(df_FIS_2016$OPENPRC, na.rm = TRUE)
df_FIS_2016$OPENPRC[is.na(df_FIS_2016$OPENPRC)] <- OPEN_AVG
df_FIS_2016
df_FIS_2016$text <- clean_tweets(df_FIS_2016$text)
df_FIS_2016$text <- lemmatize_words(df_FIS_2016$text)
#Remove columns
df_FIS_2018 <- df_FIS_2018[,-c(6,8,9,10,15,17,18,19,20,21)]
df_FIS_2018
#Ticker
df_FIS_2018$TICKER[is.na(df_FIS_2018$TICKER)] <- "FIS"
#PRC
PRC_AVG <- mean(df_FIS_2018$PRC, na.rm = TRUE)
df_FIS_2018$PRC[is.na(df_FIS_2018$PRC)] <- PRC_AVG
#VOL
VOL_AVG <- mean(df_FIS_2018$VOL, na.rm = TRUE)
df_FIS_2018$VOL[is.na(df_FIS_2018$VOL)] <- VOL_AVG
#BID
BID_AVG <- BID_AVG <- mean(df_FIS_2018$BID, na.rm = TRUE)
df_FIS_2018$BID[is.na(df_FIS_2018$BID)] <- BID_AVG
#ASK
ASK_AVG <- ASK_AVG <- mean(df_FIS_2018$ASK, na.rm = TRUE)
df_FIS_2018$ASK[is.na(df_FIS_2018$ASK)] <- ASK_AVG
#OPENPRC
OPEN_AVG <- OPEN_AVG <- mean(df_FIS_2018$OPENPRC, na.rm = TRUE)
df_FIS_2018$OPENPRC[is.na(df_FIS_2018$OPENPRC)] <- OPEN_AVG
df_FIS_2018
df_FIS_2018$text <- clean_tweets(df_FIS_2018$text)
df_FIS_2018$text <- lemmatize_words(df_FIS_2018$text)
#Leucadia nation
#Remove columns
df_Leucadia_2016 <- df_Leucadia_2016[,-c(6,8,9,10,15,17,18,19,20,21)]
df_Leucadia_2016
#Ticker
df_Leucadia_2016$TICKER[is.na(df_Leucadia_2016$TICKER)] <- "LUK"
#PRC
PRC_AVG <- mean(df_Leucadia_2016$PRC, na.rm = TRUE)
df_Leucadia_2016$PRC[is.na(df_Leucadia_2016$PRC)] <- PRC_AVG
#VOL
VOL_AVG <- mean(df_Leucadia_2016$VOL, na.rm = TRUE)
df_Leucadia_2016$VOL[is.na(df_Leucadia_2016$VOL)] <- VOL_AVG
#BID
BID_AVG <- BID_AVG <- mean(df_Leucadia_2016$BID, na.rm = TRUE)
df_Leucadia_2016$BID[is.na(df_Leucadia_2016$BID)] <- BID_AVG
#ASK
ASK_AVG <- ASK_AVG <- mean(df_Leucadia_2016$ASK, na.rm = TRUE)
df_Leucadia_2016$ASK[is.na(df_Leucadia_2016$ASK)] <- ASK_AVG
#OPENPRC
OPEN_AVG <- OPEN_AVG <- mean(df_Leucadia_2016$OPENPRC, na.rm = TRUE)
df_Leucadia_2016$OPENPRC[is.na(df_Leucadia_2016$OPENPRC)] <- OPEN_AVG
df_Leucadia_2016
df_Leucadia_2016$text <- clean_tweets(df_Leucadia_2016$text)
df_Leucadia_2016$text <- lemmatize_words(df_Leucadia_2016$text)
#Remove columns
df_Leucadia_2018 <- df_Leucadia_2018[,-c(6,8,9,10,15,17,18,19,20,21)]
df_Leucadia_2018
#Ticker
df_Leucadia_2018$TICKER[is.na(df_Leucadia_2018$TICKER)] <- "LUK"
#PRC
PRC_AVG <- mean(df_Leucadia_2018$PRC, na.rm = TRUE)
df_Leucadia_2018$PRC[is.na(df_Leucadia_2018$PRC)] <- PRC_AVG
#VOL
VOL_AVG <- mean(df_Leucadia_2018$VOL, na.rm = TRUE)
df_Leucadia_2018$VOL[is.na(df_Leucadia_2018$VOL)] <- VOL_AVG
#BID
BID_AVG <- BID_AVG <- mean(df_Leucadia_2018$BID, na.rm = TRUE)
df_Leucadia_2018$BID[is.na(df_Leucadia_2018$BID)] <- BID_AVG
#ASK
ASK_AVG <- ASK_AVG <- mean(df_Leucadia_2018$ASK, na.rm = TRUE)
df_Leucadia_2018$ASK[is.na(df_Leucadia_2018$ASK)] <- ASK_AVG
#OPENPRC
OPEN_AVG <- OPEN_AVG <- mean(df_Leucadia_2018$OPENPRC, na.rm = TRUE)
df_Leucadia_2018$OPENPRC[is.na(df_Leucadia_2018$OPENPRC)] <- OPEN_AVG
df_Leucadia_2018
df_Leucadia_2018$text <- clean_tweets(df_Leucadia_2018$text)
df_Leucadia_2018$text <- lemmatize_words(df_Leucadia_2018$text)
#Verizon
#Remove columns
df_Verizon_2018 <- df_Verizon_2018[,-c(6,8,9,10,15,17,18,19,20,21)]
df_Verizon_2018
#Ticker
df_Verizon_2018$TICKER[is.na(df_Verizon_2018$TICKER)] <- "VZ"
#PRC
PRC_AVG <- mean(df_Verizon_2018$PRC, na.rm = TRUE)
df_Verizon_2018$PRC[is.na(df_Verizon_2018$PRC)] <- PRC_AVG
#VOL
VOL_AVG <- mean(df_Verizon_2018$VOL, na.rm = TRUE)
df_Verizon_2018$VOL[is.na(df_Verizon_2018$VOL)] <- VOL_AVG
#BID
BID_AVG <- BID_AVG <- mean(df_Verizon_2018$BID, na.rm = TRUE)
df_Verizon_2018$BID[is.na(df_Verizon_2018$BID)] <- BID_AVG
#ASK
ASK_AVG <- ASK_AVG <- mean(df_Verizon_2018$ASK, na.rm = TRUE)
df_Verizon_2018$ASK[is.na(df_Verizon_2018$ASK)] <- ASK_AVG
#OPENPRC
OPEN_AVG <- OPEN_AVG <- mean(df_Verizon_2018$OPENPRC, na.rm = TRUE)
df_Verizon_2018$OPENPRC[is.na(df_Verizon_2018$OPENPRC)] <- OPEN_AVG
df_Verizon_2018
df_Verizon_2018$text <- clean_tweets(df_Verizon_2018$text)
df_Verizon_2018$text <- lemmatize_words(df_Verizon_2018$text)
#Western Union
#Remove columns
df_WU_2016 <- df_WU_2016[,-c(6,8,9,10,15,17,18,19,20,21)]
df_WU_2016
#Ticker
df_WU_2016$TICKER[is.na(df_WU_2016$TICKER)] <- "WU"
#PRC
PRC_AVG <- mean(df_WU_2016$PRC, na.rm = TRUE)
df_WU_2016$PRC[is.na(df_WU_2016$PRC)] <- PRC_AVG
#VOL
VOL_AVG <- mean(df_WU_2016$VOL, na.rm = TRUE)
df_WU_2016$VOL[is.na(df_WU_2016$VOL)] <- VOL_AVG
#BID
BID_AVG <- BID_AVG <- mean(df_WU_2016$BID, na.rm = TRUE)
df_WU_2016$BID[is.na(df_WU_2016$BID)] <- BID_AVG
#ASK
ASK_AVG <- ASK_AVG <- mean(df_WU_2016$ASK, na.rm = TRUE)
df_WU_2016$ASK[is.na(df_WU_2016$ASK)] <- ASK_AVG
#OPENPRC
OPEN_AVG <- OPEN_AVG <- mean(df_WU_2016$OPENPRC, na.rm = TRUE)
df_WU_2016$OPENPRC[is.na(df_WU_2016$OPENPRC)] <- OPEN_AVG
df_WU_2016
df_WU_2016$text <- clean_tweets(df_WU_2016$text)
df_WU_2016$text <- lemmatize_words(df_WU_2016$text)
#Remove columns
df_WU_2018 <- df_WU_2018[,-c(6,8,9,10,15,17,18,19,20,21)]
df_WU_2018
#Ticker
df_WU_2018$TICKER[is.na(df_WU_2018$TICKER)] <- "WU"
#PRC
PRC_AVG <- mean(df_WU_2018$PRC, na.rm = TRUE)
df_WU_2018$PRC[is.na(df_WU_2018$PRC)] <- PRC_AVG
#VOL
VOL_AVG <- mean(df_WU_2018$VOL, na.rm = TRUE)
df_WU_2018$VOL[is.na(df_WU_2018$VOL)] <- VOL_AVG
#BID
BID_AVG <- BID_AVG <- mean(df_WU_2018$BID, na.rm = TRUE)
df_WU_2018$BID[is.na(df_WU_2018$BID)] <- BID_AVG
#ASK
ASK_AVG <- ASK_AVG <- mean(df_WU_2018$ASK, na.rm = TRUE)
df_WU_2018$ASK[is.na(df_WU_2018$ASK)] <- ASK_AVG
#OPENPRC
OPEN_AVG <- OPEN_AVG <- mean(df_WU_2018$OPENPRC, na.rm = TRUE)
df_WU_2018$OPENPRC[is.na(df_WU_2018$OPENPRC)] <- OPEN_AVG
df_WU_2018
df_WU_2018$text <- clean_tweets(df_WU_2018$text)
df_WU_2018$text <- lemmatize_words(df_WU_2018$text)
#Redhat
#Remove columns
df_RedHat_2016 <- df_RedHat_2016[,-c(6,8,9,10,15,17,18,19,20,21)]
df_RedHat_2016
#Ticker
df_RedHat_2016$TICKER[is.na(df_RedHat_2016$TICKER)] <- "RHT"
#PRC
PRC_AVG <- mean(df_RedHat_2016$PRC, na.rm = TRUE)
df_RedHat_2016$PRC[is.na(df_RedHat_2016$PRC)] <- PRC_AVG
#VOL
VOL_AVG <- mean(df_RedHat_2016$VOL, na.rm = TRUE)
df_RedHat_2016$VOL[is.na(df_RedHat_2016$VOL)] <- VOL_AVG
#BID
BID_AVG <- BID_AVG <- mean(df_RedHat_2016$BID, na.rm = TRUE)
df_RedHat_2016$BID[is.na(df_RedHat_2016$BID)] <- BID_AVG
#ASK
ASK_AVG <- ASK_AVG <- mean(df_RedHat_2016$ASK, na.rm = TRUE)
df_RedHat_2016$ASK[is.na(df_RedHat_2016$ASK)] <- ASK_AVG
#OPENPRC
OPEN_AVG <- OPEN_AVG <- mean(df_RedHat_2016$OPENPRC, na.rm = TRUE)
df_RedHat_2016$OPENPRC[is.na(df_RedHat_2016$OPENPRC)] <- OPEN_AVG
df_RedHat_2016
df_RedHat_2016$text <- clean_tweets(df_RedHat_2016$text)
df_RedHat_2016$text <- lemmatize_words(df_RedHat_2016$text)
#Remove columns
df_RedHat_2018 <- df_RedHat_2018[,-c(6,8,9,10,15,17,18,19,20,21)]
df_RedHat_2018
#Ticker
df_RedHat_2018$TICKER[is.na(df_RedHat_2018$TICKER)] <- "RHT"
#PRC
PRC_AVG <- mean(df_RedHat_2018$PRC, na.rm = TRUE)
df_RedHat_2018$PRC[is.na(df_RedHat_2018$PRC)] <- PRC_AVG
#VOL
VOL_AVG <- mean(df_RedHat_2018$VOL, na.rm = TRUE)
df_RedHat_2018$VOL[is.na(df_RedHat_2018$VOL)] <- VOL_AVG
#BID
BID_AVG <- BID_AVG <- mean(df_RedHat_2018$BID, na.rm = TRUE)
df_RedHat_2018$BID[is.na(df_RedHat_2018$BID)] <- BID_AVG
#ASK
ASK_AVG <- ASK_AVG <- mean(df_RedHat_2018$ASK, na.rm = TRUE)
df_RedHat_2018$ASK[is.na(df_RedHat_2018$ASK)] <- ASK_AVG
#OPENPRC
OPEN_AVG <- OPEN_AVG <- mean(df_RedHat_2018$OPENPRC, na.rm = TRUE)
df_RedHat_2018$OPENPRC[is.na(df_RedHat_2018$OPENPRC)] <- OPEN_AVG
df_RedHat_2018
df_RedHat_2018$text <- clean_tweets(df_RedHat_2018$text)
df_RedHat_2018$text <- lemmatize_words(df_RedHat_2018$text)
#Amazon
#Remove columns
df_AMZN_2016 <- df_AMZN_2016[,-c(6,8,9,10,15,17,18,19,20,21)]
df_AMZN_2016
#Ticker
df_AMZN_2016$TICKER[is.na(df_AMZN_2016$TICKER)] <- "AMZN"
#PRC
PRC_AVG <- mean(df_AMZN_2016$PRC, na.rm = TRUE)
df_AMZN_2016$PRC[is.na(df_AMZN_2016$PRC)] <- PRC_AVG
#VOL
VOL_AVG <- mean(df_AMZN_2016$VOL, na.rm = TRUE)
df_AMZN_2016$VOL[is.na(df_AMZN_2016$VOL)] <- VOL_AVG
#BID
BID_AVG <- BID_AVG <- mean(df_AMZN_2016$BID, na.rm = TRUE)
df_AMZN_2016$BID[is.na(df_AMZN_2016$BID)] <- BID_AVG
#ASK
ASK_AVG <- ASK_AVG <- mean(df_AMZN_2016$ASK, na.rm = TRUE)
df_AMZN_2016$ASK[is.na(df_AMZN_2016$ASK)] <- ASK_AVG
#OPENPRC
OPEN_AVG <- OPEN_AVG <- mean(df_AMZN_2016$OPENPRC, na.rm = TRUE)
df_AMZN_2016$OPENPRC[is.na(df_AMZN_2016$OPENPRC)] <- OPEN_AVG
df_AMZN_2016
df_AMZN_2016$text <- clean_tweets(df_AMZN_2016$text)
df_AMZN_2016$text <- lemmatize_words(df_AMZN_2016$text)
#Remove columns
df_AMZN_2018 <- df_AMZN_2018[,-c(6,8,9,10,15,17,18,19,20,21)]
df_AMZN_2018
#Ticker
df_AMZN_2018$TICKER[is.na(df_AMZN_2018$TICKER)] <- "AMZN"
#PRC
PRC_AVG <- mean(df_AMZN_2018$PRC, na.rm = TRUE)
df_AMZN_2018$PRC[is.na(df_AMZN_2018$PRC)] <- PRC_AVG
#VOL
VOL_AVG <- mean(df_AMZN_2018$VOL, na.rm = TRUE)
df_AMZN_2018$VOL[is.na(df_AMZN_2018$VOL)] <- VOL_AVG
#BID
BID_AVG <- BID_AVG <- mean(df_AMZN_2018$BID, na.rm = TRUE)
df_AMZN_2018$BID[is.na(df_AMZN_2018$BID)] <- BID_AVG
#ASK
ASK_AVG <- ASK_AVG <- mean(df_AMZN_2018$ASK, na.rm = TRUE)
df_AMZN_2018$ASK[is.na(df_AMZN_2018$ASK)] <- ASK_AVG
#OPENPRC
OPEN_AVG <- OPEN_AVG <- mean(df_AMZN_2018$OPENPRC, na.rm = TRUE)
df_AMZN_2018$OPENPRC[is.na(df_AMZN_2018$OPENPRC)] <- OPEN_AVG
df_AMZN_2018
df_AMZN_2018$text <- clean_tweets(df_AMZN_2018$text)
df_AMZN_2018$text <- lemmatize_words(df_AMZN_2018$text)
#GE
#Remove columns
df_GE_2016 <- df_GE_2016[,-c(6,8,9,10,15,17,18,19,20,21)]
df_GE_2016
#Ticker
df_GE_2016$TICKER[is.na(df_GE_2016$TICKER)] <- "GE"
#PRC
PRC_AVG <- mean(df_GE_2016$PRC, na.rm = TRUE)
df_GE_2016$PRC[is.na(df_GE_2016$PRC)] <- PRC_AVG
#VOL
VOL_AVG <- mean(df_GE_2016$VOL, na.rm = TRUE)
df_GE_2016$VOL[is.na(df_GE_2016$VOL)] <- VOL_AVG
#BID
BID_AVG <- BID_AVG <- mean(df_GE_2016$BID, na.rm = TRUE)
df_GE_2016$BID[is.na(df_GE_2016$BID)] <- BID_AVG
#ASK
ASK_AVG <- ASK_AVG <- mean(df_GE_2016$ASK, na.rm = TRUE)
df_GE_2016$ASK[is.na(df_GE_2016$ASK)] <- ASK_AVG
#OPENPRC
OPEN_AVG <- OPEN_AVG <- mean(df_GE_2016$OPENPRC, na.rm = TRUE)
df_GE_2016$OPENPRC[is.na(df_GE_2016$OPENPRC)] <- OPEN_AVG
df_GE_2016
df_GE_2016$text <- clean_tweets(df_GE_2016$text)
df_GE_2016$text <- lemmatize_words(df_GE_2016$text)
#Remove columns
df_GE_2017 <- df_GE_2017[,-c(6,8,9,10,15,17,18,19,20,21)]
df_GE_2017
#Ticker
df_GE_2017$TICKER[is.na(df_GE_2017$TICKER)] <- "GE"
#PRC
PRC_AVG <- mean(df_GE_2017$PRC, na.rm = TRUE)
df_GE_2017$PRC[is.na(df_GE_2017$PRC)] <- PRC_AVG
#VOL
VOL_AVG <- mean(df_GE_2017$VOL, na.rm = TRUE)
df_GE_2017$VOL[is.na(df_GE_2017$VOL)] <- VOL_AVG
#BID
BID_AVG <- BID_AVG <- mean(df_GE_2017$BID, na.rm = TRUE)
df_GE_2017$BID[is.na(df_GE_2017$BID)] <- BID_AVG
#ASK
ASK_AVG <- ASK_AVG <- mean(df_GE_2017$ASK, na.rm = TRUE)
df_GE_2017$ASK[is.na(df_GE_2017$ASK)] <- ASK_AVG
#OPENPRC
OPEN_AVG <- OPEN_AVG <- mean(df_GE_2017$OPENPRC, na.rm = TRUE)
df_GE_2017$OPENPRC[is.na(df_GE_2017$OPENPRC)] <- OPEN_AVG
df_GE_2017
df_GE_2017$text <- clean_tweets(df_GE_2017$text)
df_GE_2017$text <- lemmatize_words(df_GE_2017$text)
#Fiserv
#Remove columns
df_Fiserv_2016 <- df_Fiserv_2016[,-c(6,8,9,10,15,17,18,19,20,21)]
df_Fiserv_2016
#Ticker
df_Fiserv_2016$TICKER[is.na(df_Fiserv_2016$TICKER)] <- "FISV"
#PRC
PRC_AVG <- mean(df_Fiserv_2016$PRC, na.rm = TRUE)
df_Fiserv_2016$PRC[is.na(df_Fiserv_2016$PRC)] <- PRC_AVG
#VOL
VOL_AVG <- mean(df_Fiserv_2016$VOL, na.rm = TRUE)
df_Fiserv_2016$VOL[is.na(df_Fiserv_2016$VOL)] <- VOL_AVG
#BID
BID_AVG <- BID_AVG <- mean(df_Fiserv_2016$BID, na.rm = TRUE)
df_Fiserv_2016$BID[is.na(df_Fiserv_2016$BID)] <- BID_AVG
#ASK
ASK_AVG <- ASK_AVG <- mean(df_Fiserv_2016$ASK, na.rm = TRUE)
df_Fiserv_2016$ASK[is.na(df_Fiserv_2016$ASK)] <- ASK_AVG
#OPENPRC
OPEN_AVG <- OPEN_AVG <- mean(df_Fiserv_2016$OPENPRC, na.rm = TRUE)
df_Fiserv_2016$OPENPRC[is.na(df_Fiserv_2016$OPENPRC)] <- OPEN_AVG
df_Fiserv_2016
df_Fiserv_2016$text <- clean_tweets(df_Fiserv_2016$text)
df_Fiserv_2016$text <- lemmatize_words(df_Fiserv_2016$text)
#Fiserv
#Remove columns
df_Fiserv_2018 <- df_Fiserv_2018[,-c(6,8,9,10,15,17,18,19,20,21)]
df_Fiserv_2018
#Ticker
df_Fiserv_2018$TICKER[is.na(df_Fiserv_2018$TICKER)] <- "FISV"
#PRC
PRC_AVG <- mean(df_Fiserv_2018$PRC, na.rm = TRUE)
df_Fiserv_2018$PRC[is.na(df_Fiserv_2018$PRC)] <- PRC_AVG
#VOL
VOL_AVG <- mean(df_Fiserv_2018$VOL, na.rm = TRUE)
df_Fiserv_2018$VOL[is.na(df_Fiserv_2018$VOL)] <- VOL_AVG
#BID
BID_AVG <- BID_AVG <- mean(df_Fiserv_2018$BID, na.rm = TRUE)
df_Fiserv_2018$BID[is.na(df_Fiserv_2018$BID)] <- BID_AVG
#ASK
ASK_AVG <- ASK_AVG <- mean(df_Fiserv_2018$ASK, na.rm = TRUE)
df_Fiserv_2018$ASK[is.na(df_Fiserv_2018$ASK)] <- ASK_AVG
#OPENPRC
OPEN_AVG <- OPEN_AVG <- mean(df_Fiserv_2018$OPENPRC, na.rm = TRUE)
df_Fiserv_2018$OPENPRC[is.na(df_Fiserv_2018$OPENPRC)] <- OPEN_AVG
df_Fiserv_2018
df_Fiserv_2018$text <- clean_tweets(df_Fiserv_2018$text)
df_Fiserv_2018$text <- lemmatize_words(df_Fiserv_2018$text)
#Waste Management
#Remove columns
df_WM_2018 <- df_WM_2018[,-c(6,8,9,10,15,17,18,19,20,21)]
df_WM_2018
#Ticker
df_WM_2018$TICKER[is.na(df_WM_2018$TICKER)] <- "WM"
#PRC
PRC_AVG <- mean(df_WM_2018$PRC, na.rm = TRUE)
df_WM_2018$PRC[is.na(df_WM_2018$PRC)] <- PRC_AVG
#VOL
VOL_AVG <- mean(df_WM_2018$VOL, na.rm = TRUE)
df_WM_2018$VOL[is.na(df_WM_2018$VOL)] <- VOL_AVG
#BID
BID_AVG <- BID_AVG <- mean(df_WM_2018$BID, na.rm = TRUE)
df_WM_2018$BID[is.na(df_WM_2018$BID)] <- BID_AVG
#ASK
ASK_AVG <- ASK_AVG <- mean(df_WM_2018$ASK, na.rm = TRUE)
df_WM_2018$ASK[is.na(df_WM_2018$ASK)] <- ASK_AVG
#OPENPRC
OPEN_AVG <- OPEN_AVG <- mean(df_WM_2018$OPENPRC, na.rm = TRUE)
df_WM_2018$OPENPRC[is.na(df_WM_2018$OPENPRC)] <- OPEN_AVG
df_WM_2018
df_WM_2018$text <- clean_tweets(df_WM_2018$text)
df_WM_2018$text <- lemmatize_words(df_WM_2018$text)
#WillsTower
#Remove columns
df_Wills_2016 <- df_Wills_2016[,-c(6,8,9,10,15,17,18,19,20,21)]
df_Wills_2016
#Ticker
df_Wills_2016$TICKER[is.na(df_Wills_2016$TICKER)] <- "WLTW"
#PRC
PRC_AVG <- mean(df_Wills_2016$PRC, na.rm = TRUE)
df_Wills_2016$PRC[is.na(df_Wills_2016$PRC)] <- PRC_AVG
#VOL
VOL_AVG <- mean(df_Wills_2016$VOL, na.rm = TRUE)
df_Wills_2016$VOL[is.na(df_Wills_2016$VOL)] <- VOL_AVG
#BID
BID_AVG <- BID_AVG <- mean(df_Wills_2016$BID, na.rm = TRUE)
df_Wills_2016$BID[is.na(df_Wills_2016$BID)] <- BID_AVG
#ASK
ASK_AVG <- ASK_AVG <- mean(df_Wills_2016$ASK, na.rm = TRUE)
df_Wills_2016$ASK[is.na(df_Wills_2016$ASK)] <- ASK_AVG
#OPENPRC
OPEN_AVG <- OPEN_AVG <- mean(df_Wills_2016$OPENPRC, na.rm = TRUE)
df_Wills_2016$OPENPRC[is.na(df_Wills_2016$OPENPRC)] <- OPEN_AVG
df_Wills_2016
df_Wills_2016$text <- clean_tweets(df_Wills_2016$text)
df_Wills_2016$text <- lemmatize_words(df_Wills_2016$text)
#Remove columns
df_Wills_2018 <- df_Wills_2018[,-c(6,8,9,10,15,17,18,19,20,21)]
df_Wills_2018
#Ticker
df_Wills_2018$TICKER[is.na(df_Wills_2018$TICKER)] <- "WLTW"
#PRC
PRC_AVG <- mean(df_Wills_2018$PRC, na.rm = TRUE)
df_Wills_2018$PRC[is.na(df_Wills_2018$PRC)] <- PRC_AVG
#VOL
VOL_AVG <- mean(df_Wills_2018$VOL, na.rm = TRUE)
df_Wills_2018$VOL[is.na(df_Wills_2018$VOL)] <- VOL_AVG
#BID
BID_AVG <- BID_AVG <- mean(df_Wills_2018$BID, na.rm = TRUE)
df_Wills_2018$BID[is.na(df_Wills_2018$BID)] <- BID_AVG
#ASK
ASK_AVG <- ASK_AVG <- mean(df_Wills_2018$ASK, na.rm = TRUE)
df_Wills_2018$ASK[is.na(df_Wills_2018$ASK)] <- ASK_AVG
#OPENPRC
OPEN_AVG <- OPEN_AVG <- mean(df_Wills_2018$OPENPRC, na.rm = TRUE)
df_Wills_2018$OPENPRC[is.na(df_Wills_2018$OPENPRC)] <- OPEN_AVG
df_Wills_2018
df_Wills_2018$text <- clean_tweets(df_Wills_2018$text)
df_Wills_2018$text <- lemmatize_words(df_Wills_2018$text)
#Tripadvisor
#Remove columns
df_tripadvisor_2016 <- df_tripadvisor_2016[,-c(6,8,9,10,15,17,18,19,20,21)]
df_tripadvisor_2016
#Ticker
df_tripadvisor_2016$TICKER[is.na(df_tripadvisor_2016$TICKER)] <- "TRIP"
#PRC
PRC_AVG <- mean(df_tripadvisor_2016$PRC, na.rm = TRUE)
df_tripadvisor_2016$PRC[is.na(df_tripadvisor_2016$PRC)] <- PRC_AVG
#VOL
VOL_AVG <- mean(df_tripadvisor_2016$VOL, na.rm = TRUE)
df_tripadvisor_2016$VOL[is.na(df_tripadvisor_2016$VOL)] <- VOL_AVG
#BID
BID_AVG <- BID_AVG <- mean(df_tripadvisor_2016$BID, na.rm = TRUE)
df_tripadvisor_2016$BID[is.na(df_tripadvisor_2016$BID)] <- BID_AVG
#ASK
ASK_AVG <- ASK_AVG <- mean(df_tripadvisor_2016$ASK, na.rm = TRUE)
df_tripadvisor_2016$ASK[is.na(df_tripadvisor_2016$ASK)] <- ASK_AVG
#OPENPRC
OPEN_AVG <- OPEN_AVG <- mean(df_tripadvisor_2016$OPENPRC, na.rm = TRUE)
df_tripadvisor_2016$OPENPRC[is.na(df_tripadvisor_2016$OPENPRC)] <- OPEN_AVG
df_tripadvisor_2016
df_tripadvisor_2016$text <- clean_tweets(df_tripadvisor_2016$text)
df_tripadvisor_2016$text <- lemmatize_words(df_tripadvisor_2016$text)
#Remove columns
df_tripadvisor_2018 <- df_tripadvisor_2018[,-c(6,8,9,10,15,17,18,19,20,21)]
df_tripadvisor_2018
#Ticker
df_tripadvisor_2018$TICKER[is.na(df_tripadvisor_2018$TICKER)] <- "TRIP"
#PRC
PRC_AVG <- mean(df_tripadvisor_2018$PRC, na.rm = TRUE)
df_tripadvisor_2018$PRC[is.na(df_tripadvisor_2018$PRC)] <- PRC_AVG
#VOL
VOL_AVG <- mean(df_tripadvisor_2018$VOL, na.rm = TRUE)
df_tripadvisor_2018$VOL[is.na(df_tripadvisor_2018$VOL)] <- VOL_AVG
#BID
BID_AVG <- BID_AVG <- mean(df_tripadvisor_2018$BID, na.rm = TRUE)
df_tripadvisor_2018$BID[is.na(df_tripadvisor_2018$BID)] <- BID_AVG
#ASK
ASK_AVG <- ASK_AVG <- mean(df_tripadvisor_2018$ASK, na.rm = TRUE)
df_tripadvisor_2018$ASK[is.na(df_tripadvisor_2018$ASK)] <- ASK_AVG
#OPENPRC
OPEN_AVG <- OPEN_AVG <- mean(df_tripadvisor_2018$OPENPRC, na.rm = TRUE)
df_tripadvisor_2018$OPENPRC[is.na(df_tripadvisor_2018$OPENPRC)] <- OPEN_AVG
df_tripadvisor_2018
df_tripadvisor_2018$text <- clean_tweets(df_tripadvisor_2018$text)
df_tripadvisor_2018$text <- lemmatize_words(df_tripadvisor_2018$text)
#DavitaKent
#Remove columns
df_DavitaK_2016 <- df_DavitaK_2016[,-c(6,8,9,10,15,17,18,19,20,21)]
df_DavitaK_2016
#Ticker
df_DavitaK_2016$TICKER[is.na(df_DavitaK_2016$TICKER)] <- "DVA"
#PRC
PRC_AVG <- mean(df_DavitaK_2016$PRC, na.rm = TRUE)
df_DavitaK_2016$PRC[is.na(df_DavitaK_2016$PRC)] <- PRC_AVG
#VOL
VOL_AVG <- mean(df_DavitaK_2016$VOL, na.rm = TRUE)
df_DavitaK_2016$VOL[is.na(df_DavitaK_2016$VOL)] <- VOL_AVG
#BID
BID_AVG <- BID_AVG <- mean(df_DavitaK_2016$BID, na.rm = TRUE)
df_DavitaK_2016$BID[is.na(df_DavitaK_2016$BID)] <- BID_AVG
#ASK
ASK_AVG <- ASK_AVG <- mean(df_DavitaK_2016$ASK, na.rm = TRUE)
df_DavitaK_2016$ASK[is.na(df_DavitaK_2016$ASK)] <- ASK_AVG
#OPENPRC
OPEN_AVG <- OPEN_AVG <- mean(df_DavitaK_2016$OPENPRC, na.rm = TRUE)
df_DavitaK_2016$OPENPRC[is.na(df_DavitaK_2016$OPENPRC)] <- OPEN_AVG
df_DavitaK_2016
df_DavitaK_2016$text <- clean_tweets(df_DavitaK_2016$text)
df_DavitaK_2016$text <- lemmatize_words(df_DavitaK_2016$text)
#Remove columns
df_DavitaK_2018 <- df_DavitaK_2018[,-c(6,8,9,10,15,17,18,19,20,21)]
df_DavitaK_2018
#Ticker
df_DavitaK_2018$TICKER[is.na(df_DavitaK_2018$TICKER)] <- "DVA"
#PRC
PRC_AVG <- mean(df_DavitaK_2018$PRC, na.rm = TRUE)
df_DavitaK_2018$PRC[is.na(df_DavitaK_2018$PRC)] <- PRC_AVG
#VOL
VOL_AVG <- mean(df_DavitaK_2018$VOL, na.rm = TRUE)
df_DavitaK_2018$VOL[is.na(df_DavitaK_2018$VOL)] <- VOL_AVG
#BID
BID_AVG <- BID_AVG <- mean(df_DavitaK_2018$BID, na.rm = TRUE)
df_DavitaK_2018$BID[is.na(df_DavitaK_2018$BID)] <- BID_AVG
#ASK
ASK_AVG <- ASK_AVG <- mean(df_DavitaK_2018$ASK, na.rm = TRUE)
df_DavitaK_2018$ASK[is.na(df_DavitaK_2018$ASK)] <- ASK_AVG
#OPENPRC
OPEN_AVG <- OPEN_AVG <- mean(df_DavitaK_2018$OPENPRC, na.rm = TRUE)
df_DavitaK_2018$OPENPRC[is.na(df_DavitaK_2018$OPENPRC)] <- OPEN_AVG
df_DavitaK_2018
df_DavitaK_2018$text <- clean_tweets(df_DavitaK_2018$text)
df_DavitaK_2018$text <- lemmatize_words(df_DavitaK_2018$text)
#Starbucks
#Remove columns
df_Starbucks_2018 <- df_Starbucks_2018[,-c(6,8,9,10,15,17,18,19,20,21)]
df_Starbucks_2018
#Ticker
df_Starbucks_2018$TICKER[is.na(df_Starbucks_2018$TICKER)] <- "SBUX"
#PRC
PRC_AVG <- mean(df_Starbucks_2018$PRC, na.rm = TRUE)
df_Starbucks_2018$PRC[is.na(df_Starbucks_2018$PRC)] <- PRC_AVG
#VOL
VOL_AVG <- mean(df_Starbucks_2018$VOL, na.rm = TRUE)
df_Starbucks_2018$VOL[is.na(df_Starbucks_2018$VOL)] <- VOL_AVG
#BID
BID_AVG <- BID_AVG <- mean(df_Starbucks_2018$BID, na.rm = TRUE)
df_Starbucks_2018$BID[is.na(df_Starbucks_2018$BID)] <- BID_AVG
#ASK
ASK_AVG <- ASK_AVG <- mean(df_Starbucks_2018$ASK, na.rm = TRUE)
df_Starbucks_2018$ASK[is.na(df_Starbucks_2018$ASK)] <- ASK_AVG
#OPENPRC
OPEN_AVG <- OPEN_AVG <- mean(df_Starbucks_2018$OPENPRC, na.rm = TRUE)
df_Starbucks_2018$OPENPRC[is.na(df_Starbucks_2018$OPENPRC)] <- OPEN_AVG
df_Starbucks_2018
df_Starbucks_2018$text <- clean_tweets(df_Starbucks_2018$text)
df_Starbucks_2018$text <- lemmatize_words(df_Starbucks_2018$text)
#McCormick
#Remove columns
df_McCormick_2018 <- df_McCormick_2018[,-c(6,8,9,10,15,17,18,19,20,21)]
df_McCormick_2018
#Ticker
df_McCormick_2018$TICKER[is.na(df_McCormick_2018$TICKER)] <- "MKC"
#PRC
PRC_AVG <- mean(df_McCormick_2018$PRC, na.rm = TRUE)
df_McCormick_2018$PRC[is.na(df_McCormick_2018$PRC)] <- PRC_AVG
#VOL
VOL_AVG <- mean(df_McCormick_2018$VOL, na.rm = TRUE)
df_McCormick_2018$VOL[is.na(df_McCormick_2018$VOL)] <- VOL_AVG
#BID
BID_AVG <- BID_AVG <- mean(df_McCormick_2018$BID, na.rm = TRUE)
df_McCormick_2018$BID[is.na(df_McCormick_2018$BID)] <- BID_AVG
#ASK
ASK_AVG <- ASK_AVG <- mean(df_McCormick_2018$ASK, na.rm = TRUE)
df_McCormick_2018$ASK[is.na(df_McCormick_2018$ASK)] <- ASK_AVG
#OPENPRC
OPEN_AVG <- OPEN_AVG <- mean(df_McCormick_2018$OPENPRC, na.rm = TRUE)
df_McCormick_2018$OPENPRC[is.na(df_McCormick_2018$OPENPRC)] <- OPEN_AVG
df_McCormick_2018
df_McCormick_2018$text <- clean_tweets(df_McCormick_2018$text)
df_McCormick_2018$text <- lemmatize_words(df_McCormick_2018$text)
#IHS Markit
#Remove columns
df_IHS_2018 <- df_IHS_2018[,-c(6,8,9,10,15,17,18,19,20,21)]
df_IHS_2018
#Ticker
df_IHS_2018$TICKER[is.na(df_IHS_2018$TICKER)] <- "TCX"
#PRC
PRC_AVG <- mean(df_IHS_2018$PRC, na.rm = TRUE)
df_IHS_2018$PRC[is.na(df_IHS_2018$PRC)] <- PRC_AVG
#VOL
VOL_AVG <- mean(df_IHS_2018$VOL, na.rm = TRUE)
df_IHS_2018$VOL[is.na(df_IHS_2018$VOL)] <- VOL_AVG
#BID
BID_AVG <- BID_AVG <- mean(df_IHS_2018$BID, na.rm = TRUE)
df_IHS_2018$BID[is.na(df_IHS_2018$BID)] <- BID_AVG
#ASK
ASK_AVG <- ASK_AVG <- mean(df_IHS_2018$ASK, na.rm = TRUE)
df_IHS_2018$ASK[is.na(df_IHS_2018$ASK)] <- ASK_AVG
#OPENPRC
OPEN_AVG <- OPEN_AVG <- mean(df_IHS_2018$OPENPRC, na.rm = TRUE)
df_IHS_2018$OPENPRC[is.na(df_IHS_2018$OPENPRC)] <- OPEN_AVG
df_IHS_2018
df_IHS_2018$text <- clean_tweets(df_IHS_2018$text)
df_IHS_2018$text <- lemmatize_words(df_IHS_2018$text)
#AMD
#Remove columns
df_AMD_2018 <- df_AMD_2018[,-c(6,8,9,10,15,17,18,19,20,21)]
df_AMD_2018
#Ticker
df_AMD_2018$TICKER[is.na(df_AMD_2018$TICKER)] <- "AMD"
#PRC
PRC_AVG <- mean(df_AMD_2018$PRC, na.rm = TRUE)
df_AMD_2018$PRC[is.na(df_AMD_2018$PRC)] <- PRC_AVG
#VOL
VOL_AVG <- mean(df_AMD_2018$VOL, na.rm = TRUE)
df_AMD_2018$VOL[is.na(df_AMD_2018$VOL)] <- VOL_AVG
#BID
BID_AVG <- BID_AVG <- mean(df_AMD_2018$BID, na.rm = TRUE)
df_AMD_2018$BID[is.na(df_AMD_2018$BID)] <- BID_AVG
#ASK
ASK_AVG <- ASK_AVG <- mean(df_AMD_2018$ASK, na.rm = TRUE)
df_AMD_2018$ASK[is.na(df_AMD_2018$ASK)] <- ASK_AVG
#OPENPRC
OPEN_AVG <- OPEN_AVG <- mean(df_AMD_2018$OPENPRC, na.rm = TRUE)
df_AMD_2018$OPENPRC[is.na(df_AMD_2018$OPENPRC)] <- OPEN_AVG
df_AMD_2018
df_AMD_2018$text <- clean_tweets(df_AMD_2018$text)
df_AMD_2018$text <- lemmatize_words(df_AMD_2018$text)
#ResMed
#Remove columns
df_ResMed_2016 <- df_ResMed_2016[,-c(6,8,9,10,15,17,18,19,20,21)]
df_ResMed_2016
#Ticker
df_ResMed_2016$TICKER[is.na(df_ResMed_2016$TICKER)] <- "RMD"
#PRC
PRC_AVG <- mean(df_ResMed_2016$PRC, na.rm = TRUE)
df_ResMed_2016$PRC[is.na(df_ResMed_2016$PRC)] <- PRC_AVG
#VOL
VOL_AVG <- mean(df_ResMed_2016$VOL, na.rm = TRUE)
df_ResMed_2016$VOL[is.na(df_ResMed_2016$VOL)] <- VOL_AVG
#BID
BID_AVG <- BID_AVG <- mean(df_ResMed_2016$BID, na.rm = TRUE)
df_ResMed_2016$BID[is.na(df_ResMed_2016$BID)] <- BID_AVG
#ASK
ASK_AVG <- ASK_AVG <- mean(df_ResMed_2016$ASK, na.rm = TRUE)
df_ResMed_2016$ASK[is.na(df_ResMed_2016$ASK)] <- ASK_AVG
#OPENPRC
OPEN_AVG <- OPEN_AVG <- mean(df_ResMed_2016$OPENPRC, na.rm = TRUE)
df_ResMed_2016$OPENPRC[is.na(df_ResMed_2016$OPENPRC)] <- OPEN_AVG
df_ResMed_2016
df_ResMed_2016$text <- clean_tweets(df_ResMed_2016$text)
df_ResMed_2016$text <- lemmatize_words(df_ResMed_2016$text)
#Remove columns
df_ResMed_2018 <- df_ResMed_2018[,-c(6,8,9,10,15,17,18,19,20,21)]
df_ResMed_2018
#Ticker
df_ResMed_2018$TICKER[is.na(df_ResMed_2018$TICKER)] <- "RMD"
#PRC
PRC_AVG <- mean(df_ResMed_2018$PRC, na.rm = TRUE)
df_ResMed_2018$PRC[is.na(df_ResMed_2018$PRC)] <- PRC_AVG
#VOL
VOL_AVG <- mean(df_ResMed_2018$VOL, na.rm = TRUE)
df_ResMed_2018$VOL[is.na(df_ResMed_2018$VOL)] <- VOL_AVG
#BID
BID_AVG <- BID_AVG <- mean(df_ResMed_2018$BID, na.rm = TRUE)
df_ResMed_2018$BID[is.na(df_ResMed_2018$BID)] <- BID_AVG
#ASK
ASK_AVG <- ASK_AVG <- mean(df_ResMed_2018$ASK, na.rm = TRUE)
df_ResMed_2018$ASK[is.na(df_ResMed_2018$ASK)] <- ASK_AVG
#OPENPRC
OPEN_AVG <- OPEN_AVG <- mean(df_ResMed_2018$OPENPRC, na.rm = TRUE)
df_ResMed_2018$OPENPRC[is.na(df_ResMed_2018$OPENPRC)] <- OPEN_AVG
df_ResMed_2018
df_ResMed_2018$text <- clean_tweets(df_ResMed_2018$text)
df_ResMed_2018$text <- lemmatize_words(df_ResMed_2018$text)
#CA
#Remove columns
df_CA_2016 <- df_CA_2016[,-c(6,8,9,10,15,17,18,19,20,21)]
df_CA_2016
#Ticker
df_CA_2016$TICKER[is.na(df_CA_2016$TICKER)] <- "CA"
#PRC
PRC_AVG <- mean(df_CA_2016$PRC, na.rm = TRUE)
df_CA_2016$PRC[is.na(df_CA_2016$PRC)] <- PRC_AVG
#VOL
VOL_AVG <- mean(df_CA_2016$VOL, na.rm = TRUE)
df_CA_2016$VOL[is.na(df_CA_2016$VOL)] <- VOL_AVG
#BID
BID_AVG <- BID_AVG <- mean(df_CA_2016$BID, na.rm = TRUE)
df_CA_2016$BID[is.na(df_CA_2016$BID)] <- BID_AVG
#ASK
ASK_AVG <- ASK_AVG <- mean(df_CA_2016$ASK, na.rm = TRUE)
df_CA_2016$ASK[is.na(df_CA_2016$ASK)] <- ASK_AVG
#OPENPRC
OPEN_AVG <- OPEN_AVG <- mean(df_CA_2016$OPENPRC, na.rm = TRUE)
df_CA_2016$OPENPRC[is.na(df_CA_2016$OPENPRC)] <- OPEN_AVG
df_CA_2016
df_CA_2016$text <- clean_tweets(df_CA_2016$text)
df_CA_2016$text <- lemmatize_words(df_CA_2016$text)
#Remove columns
df_CA_2018 <- df_CA_2018[,-c(6,8,9,10,15,17,18,19,20,21)]
df_CA_2018
#Ticker
df_CA_2018$TICKER[is.na(df_CA_2018$TICKER)] <- "CA"
#PRC
PRC_AVG <- mean(df_CA_2018$PRC, na.rm = TRUE)
df_CA_2018$PRC[is.na(df_CA_2018$PRC)] <- PRC_AVG
#VOL
VOL_AVG <- mean(df_CA_2018$VOL, na.rm = TRUE)
df_CA_2018$VOL[is.na(df_CA_2018$VOL)] <- VOL_AVG
#BID
BID_AVG <- BID_AVG <- mean(df_CA_2018$BID, na.rm = TRUE)
df_CA_2018$BID[is.na(df_CA_2018$BID)] <- BID_AVG
#ASK
ASK_AVG <- ASK_AVG <- mean(df_CA_2018$ASK, na.rm = TRUE)
df_CA_2018$ASK[is.na(df_CA_2018$ASK)] <- ASK_AVG
#OPENPRC
OPEN_AVG <- OPEN_AVG <- mean(df_CA_2018$OPENPRC, na.rm = TRUE)
df_CA_2018$OPENPRC[is.na(df_CA_2018$OPENPRC)] <- OPEN_AVG
df_CA_2018
df_CA_2018$text <- clean_tweets(df_CA_2018$text)
df_CA_2018$text <- lemmatize_words(df_CA_2018$text)
#GM
#Remove columns
df_GM_2016 <- df_GM_2016[,-c(6,8,9,10,15,17,18,19,20,21)]
df_GM_2016
#Ticker
df_GM_2016$TICKER[is.na(df_GM_2016$TICKER)] <- "GM"
#PRC
PRC_AVG <- mean(df_GM_2016$PRC, na.rm = TRUE)
df_GM_2016$PRC[is.na(df_GM_2016$PRC)] <- PRC_AVG
#VOL
VOL_AVG <- mean(df_GM_2016$VOL, na.rm = TRUE)
df_GM_2016$VOL[is.na(df_GM_2016$VOL)] <- VOL_AVG
#BID
BID_AVG <- BID_AVG <- mean(df_GM_2016$BID, na.rm = TRUE)
df_GM_2016$BID[is.na(df_GM_2016$BID)] <- BID_AVG
#ASK
ASK_AVG <- ASK_AVG <- mean(df_GM_2016$ASK, na.rm = TRUE)
df_GM_2016$ASK[is.na(df_GM_2016$ASK)] <- ASK_AVG
#OPENPRC
OPEN_AVG <- OPEN_AVG <- mean(df_GM_2016$OPENPRC, na.rm = TRUE)
df_GM_2016$OPENPRC[is.na(df_GM_2016$OPENPRC)] <- OPEN_AVG
df_GM_2016
df_GM_2016$text <- clean_tweets(df_GM_2016$text)
df_GM_2016$text <- lemmatize_words(df_GM_2016$text)
#Remove columns
df_GM_2018 <- df_GM_2018[,-c(6,8,9,10,15,17,18,19,20,21)]
df_GM_2018
#Ticker
df_GM_2018$TICKER[is.na(df_GM_2018$TICKER)] <- "GM"
#PRC
PRC_AVG <- mean(df_GM_2018$PRC, na.rm = TRUE)
df_GM_2018$PRC[is.na(df_GM_2018$PRC)] <- PRC_AVG
#VOL
VOL_AVG <- mean(df_GM_2018$VOL, na.rm = TRUE)
df_GM_2018$VOL[is.na(df_GM_2018$VOL)] <- VOL_AVG
#BID
BID_AVG <- BID_AVG <- mean(df_GM_2018$BID, na.rm = TRUE)
df_GM_2018$BID[is.na(df_GM_2018$BID)] <- BID_AVG
#ASK
ASK_AVG <- ASK_AVG <- mean(df_GM_2018$ASK, na.rm = TRUE)
df_GM_2018$ASK[is.na(df_GM_2018$ASK)] <- ASK_AVG
#OPENPRC
OPEN_AVG <- OPEN_AVG <- mean(df_GM_2018$OPENPRC, na.rm = TRUE)
df_GM_2018$OPENPRC[is.na(df_GM_2018$OPENPRC)] <- OPEN_AVG
df_GM_2018
df_GM_2018$text <- clean_tweets(df_GM_2018$text)
df_GM_2018$text <- lemmatize_words(df_GM_2018$text)
#Aetna
#Remove columns
df_Aetna_2016 <- df_Aetna_2016[,-c(6,8,9,10,15,17,18,19,20,21)]
df_Aetna_2016
#Ticker
df_Aetna_2016$TICKER[is.na(df_Aetna_2016$TICKER)] <- "AET"
#PRC
PRC_AVG <- mean(df_Aetna_2016$PRC, na.rm = TRUE)
df_Aetna_2016$PRC[is.na(df_Aetna_2016$PRC)] <- PRC_AVG
#VOL
VOL_AVG <- mean(df_Aetna_2016$VOL, na.rm = TRUE)
df_Aetna_2016$VOL[is.na(df_Aetna_2016$VOL)] <- VOL_AVG
#BID
BID_AVG <- BID_AVG <- mean(df_Aetna_2016$BID, na.rm = TRUE)
df_Aetna_2016$BID[is.na(df_Aetna_2016$BID)] <- BID_AVG
#ASK
ASK_AVG <- ASK_AVG <- mean(df_Aetna_2016$ASK, na.rm = TRUE)
df_Aetna_2016$ASK[is.na(df_Aetna_2016$ASK)] <- ASK_AVG
#OPENPRC
OPEN_AVG <- OPEN_AVG <- mean(df_Aetna_2016$OPENPRC, na.rm = TRUE)
df_Aetna_2016$OPENPRC[is.na(df_Aetna_2016$OPENPRC)] <- OPEN_AVG
df_Aetna_2016
df_Aetna_2016$text <- clean_tweets(df_Aetna_2016$text)
df_Aetna_2016$text <- lemmatize_words(df_Aetna_2016$text)
#Remove columns
df_Aetna_2018 <- df_Aetna_2018[,-c(6,8,9,10,15,17,18,19,20,21)]
df_Aetna_2018
#Ticker
df_Aetna_2018$TICKER[is.na(df_Aetna_2018$TICKER)] <- "AET"
#PRC
PRC_AVG <- mean(df_Aetna_2018$PRC, na.rm = TRUE)
df_Aetna_2018$PRC[is.na(df_Aetna_2018$PRC)] <- PRC_AVG
#VOL
VOL_AVG <- mean(df_Aetna_2018$VOL, na.rm = TRUE)
df_Aetna_2018$VOL[is.na(df_Aetna_2018$VOL)] <- VOL_AVG
#BID
BID_AVG <- BID_AVG <- mean(df_Aetna_2018$BID, na.rm = TRUE)
df_Aetna_2018$BID[is.na(df_Aetna_2018$BID)] <- BID_AVG
#ASK
ASK_AVG <- ASK_AVG <- mean(df_Aetna_2018$ASK, na.rm = TRUE)
df_Aetna_2018$ASK[is.na(df_Aetna_2018$ASK)] <- ASK_AVG
#OPENPRC
OPEN_AVG <- OPEN_AVG <- mean(df_Aetna_2018$OPENPRC, na.rm = TRUE)
df_Aetna_2018$OPENPRC[is.na(df_Aetna_2018$OPENPRC)] <- OPEN_AVG
df_Aetna_2018
df_Aetna_2018$text <- clean_tweets(df_Aetna_2018$text)
df_Aetna_2018$text <- lemmatize_words(df_Aetna_2018$text)
#NRG
#Remove columns
df_NRG_2016 <- df_NRG_2016[,-c(6,8,9,10,15,17,18,19,20,21)]
df_NRG_2016
#Ticker
df_NRG_2016$TICKER[is.na(df_NRG_2016$TICKER)] <- "NRG"
#PRC
PRC_AVG <- mean(df_NRG_2016$PRC, na.rm = TRUE)
df_NRG_2016$PRC[is.na(df_NRG_2016$PRC)] <- PRC_AVG
#VOL
VOL_AVG <- mean(df_NRG_2016$VOL, na.rm = TRUE)
df_NRG_2016$VOL[is.na(df_NRG_2016$VOL)] <- VOL_AVG
#BID
BID_AVG <- BID_AVG <- mean(df_NRG_2016$BID, na.rm = TRUE)
df_NRG_2016$BID[is.na(df_NRG_2016$BID)] <- BID_AVG
#ASK
ASK_AVG <- ASK_AVG <- mean(df_NRG_2016$ASK, na.rm = TRUE)
df_NRG_2016$ASK[is.na(df_NRG_2016$ASK)] <- ASK_AVG
#OPENPRC
OPEN_AVG <- OPEN_AVG <- mean(df_NRG_2016$OPENPRC, na.rm = TRUE)
df_NRG_2016$OPENPRC[is.na(df_NRG_2016$OPENPRC)] <- OPEN_AVG
df_NRG_2016
df_NRG_2016$text <- clean_tweets(df_NRG_2016$text)
df_NRG_2016$text <- lemmatize_words(df_NRG_2016$text)
#Remove columns
df_NRG_2018 <- df_NRG_2018[,-c(6,8,9,10,15,17,18,19,20,21)]
df_NRG_2018
#Ticker
df_NRG_2018$TICKER[is.na(df_NRG_2018$TICKER)] <- "NRG"
#PRC
PRC_AVG <- mean(df_NRG_2018$PRC, na.rm = TRUE)
df_NRG_2018$PRC[is.na(df_NRG_2018$PRC)] <- PRC_AVG
#VOL
VOL_AVG <- mean(df_NRG_2018$VOL, na.rm = TRUE)
df_NRG_2018$VOL[is.na(df_NRG_2018$VOL)] <- VOL_AVG
#BID
BID_AVG <- BID_AVG <- mean(df_NRG_2018$BID, na.rm = TRUE)
df_NRG_2018$BID[is.na(df_NRG_2018$BID)] <- BID_AVG
#ASK
ASK_AVG <- ASK_AVG <- mean(df_NRG_2018$ASK, na.rm = TRUE)
df_NRG_2018$ASK[is.na(df_NRG_2018$ASK)] <- ASK_AVG
#OPENPRC
OPEN_AVG <- OPEN_AVG <- mean(df_NRG_2018$OPENPRC, na.rm = TRUE)
df_NRG_2018$OPENPRC[is.na(df_NRG_2018$OPENPRC)] <- OPEN_AVG
df_NRG_2018
df_NRG_2018$text <- clean_tweets(df_NRG_2018$text)
df_NRG_2018$text <- lemmatize_words(df_NRG_2018$text)
#Medtronic
#Remove columns
df_Medtronic_2016 <- df_Medtronic_2016[,-c(6,8,9,10,15,17,18,19,20,21)]
df_Medtronic_2016
#Ticker
df_Medtronic_2016$TICKER[is.na(df_Medtronic_2016$TICKER)] <- "MDT"
#PRC
PRC_AVG <- mean(df_Medtronic_2016$PRC, na.rm = TRUE)
df_Medtronic_2016$PRC[is.na(df_Medtronic_2016$PRC)] <- PRC_AVG
#VOL
VOL_AVG <- mean(df_Medtronic_2016$VOL, na.rm = TRUE)
df_Medtronic_2016$VOL[is.na(df_Medtronic_2016$VOL)] <- VOL_AVG
#BID
BID_AVG <- BID_AVG <- mean(df_Medtronic_2016$BID, na.rm = TRUE)
df_Medtronic_2016$BID[is.na(df_Medtronic_2016$BID)] <- BID_AVG
#ASK
ASK_AVG <- ASK_AVG <- mean(df_Medtronic_2016$ASK, na.rm = TRUE)
df_Medtronic_2016$ASK[is.na(df_Medtronic_2016$ASK)] <- ASK_AVG
#OPENPRC
OPEN_AVG <- OPEN_AVG <- mean(df_Medtronic_2016$OPENPRC, na.rm = TRUE)
df_Medtronic_2016$OPENPRC[is.na(df_Medtronic_2016$OPENPRC)] <- OPEN_AVG
df_Medtronic_2016
df_Medtronic_2016$text <- clean_tweets(df_Medtronic_2016$text)
df_Medtronic_2016$text <- lemmatize_words(df_Medtronic_2016$text)
#Remove columns
df_Medtronic_2018 <- df_Medtronic_2018[,-c(6,8,9,10,15,17,18,19,20,21)]
df_Medtronic_2018
#Ticker
df_Medtronic_2018$TICKER[is.na(df_Medtronic_2018$TICKER)] <- "MDT"
#PRC
PRC_AVG <- mean(df_Medtronic_2018$PRC, na.rm = TRUE)
df_Medtronic_2018$PRC[is.na(df_Medtronic_2018$PRC)] <- PRC_AVG
#VOL
VOL_AVG <- mean(df_Medtronic_2018$VOL, na.rm = TRUE)
df_Medtronic_2018$VOL[is.na(df_Medtronic_2018$VOL)] <- VOL_AVG
#BID
BID_AVG <- BID_AVG <- mean(df_Medtronic_2018$BID, na.rm = TRUE)
df_Medtronic_2018$BID[is.na(df_Medtronic_2018$BID)] <- BID_AVG
#ASK
ASK_AVG <- ASK_AVG <- mean(df_Medtronic_2018$ASK, na.rm = TRUE)
df_Medtronic_2018$ASK[is.na(df_Medtronic_2018$ASK)] <- ASK_AVG
#OPENPRC
OPEN_AVG <- OPEN_AVG <- mean(df_Medtronic_2018$OPENPRC, na.rm = TRUE)
df_Medtronic_2018$OPENPRC[is.na(df_Medtronic_2018$OPENPRC)] <- OPEN_AVG
df_Medtronic_2018
df_Medtronic_2018$text <- clean_tweets(df_Medtronic_2018$text)
df_Medtronic_2018$text <- lemmatize_words(df_Medtronic_2018$text)
#Juniper
#Remove columns
df_Juniper_2016 <- df_Juniper_2016[,-c(6,8,9,10,15,17,18,19,20,21)]
df_Juniper_2016
#Ticker
df_Juniper_2016$TICKER[is.na(df_Juniper_2016$TICKER)] <- "JNPR"
#PRC
PRC_AVG <- mean(df_Juniper_2016$PRC, na.rm = TRUE)
df_Juniper_2016$PRC[is.na(df_Juniper_2016$PRC)] <- PRC_AVG
#VOL
VOL_AVG <- mean(df_Juniper_2016$VOL, na.rm = TRUE)
df_Juniper_2016$VOL[is.na(df_Juniper_2016$VOL)] <- VOL_AVG
#BID
BID_AVG <- BID_AVG <- mean(df_Juniper_2016$BID, na.rm = TRUE)
df_Juniper_2016$BID[is.na(df_Juniper_2016$BID)] <- BID_AVG
#ASK
ASK_AVG <- ASK_AVG <- mean(df_Juniper_2016$ASK, na.rm = TRUE)
df_Juniper_2016$ASK[is.na(df_Juniper_2016$ASK)] <- ASK_AVG
#OPENPRC
OPEN_AVG <- OPEN_AVG <- mean(df_Juniper_2016$OPENPRC, na.rm = TRUE)
df_Juniper_2016$OPENPRC[is.na(df_Juniper_2016$OPENPRC)] <- OPEN_AVG
df_Juniper_2016
df_Juniper_2016$text <- clean_tweets(df_Juniper_2016$text)
df_Juniper_2016$text <- lemmatize_words(df_Juniper_2016$text)
#Remove columns
df_Juniper_2018 <- df_Juniper_2018[,-c(6,8,9,10,15,17,18,19,20,21)]
df_Juniper_2018
#Ticker
df_Juniper_2018$TICKER[is.na(df_Juniper_2018$TICKER)] <- "JNPR"
#PRC
PRC_AVG <- mean(df_Juniper_2018$PRC, na.rm = TRUE)
df_Juniper_2018$PRC[is.na(df_Juniper_2018$PRC)] <- PRC_AVG
#VOL
VOL_AVG <- mean(df_Juniper_2018$VOL, na.rm = TRUE)
df_Juniper_2018$VOL[is.na(df_Juniper_2018$VOL)] <- VOL_AVG
#BID
BID_AVG <- BID_AVG <- mean(df_Juniper_2018$BID, na.rm = TRUE)
df_Juniper_2018$BID[is.na(df_Juniper_2018$BID)] <- BID_AVG
#ASK
ASK_AVG <- ASK_AVG <- mean(df_Juniper_2018$ASK, na.rm = TRUE)
df_Juniper_2018$ASK[is.na(df_Juniper_2018$ASK)] <- ASK_AVG
#OPENPRC
OPEN_AVG <- OPEN_AVG <- mean(df_Juniper_2018$OPENPRC, na.rm = TRUE)
df_Juniper_2018$OPENPRC[is.na(df_Juniper_2018$OPENPRC)] <- OPEN_AVG
df_Juniper_2018
df_Juniper_2018$text <- clean_tweets(df_Juniper_2018$text)
df_Juniper_2018$text <- lemmatize_words(df_Juniper_2018$text)
#AetnaR
df_AetnaR_2018 <- df_AetnaR_2018[,-c(6,8,9,10,15,17,18,19,20,21)]
df_AetnaR_2018
#Ticker
df_AetnaR_2018$TICKER[is.na(df_AetnaR_2018$TICKER)] <- "AET"
#PRC
PRC_AVG <- mean(df_AetnaR_2018$PRC, na.rm = TRUE)
df_AetnaR_2018$PRC[is.na(df_AetnaR_2018$PRC)] <- PRC_AVG
#VOL
VOL_AVG <- mean(df_AetnaR_2018$VOL, na.rm = TRUE)
df_AetnaR_2018$VOL[is.na(df_AetnaR_2018$VOL)] <- VOL_AVG
#BID
BID_AVG <- BID_AVG <- mean(df_AetnaR_2018$BID, na.rm = TRUE)
df_AetnaR_2018$BID[is.na(df_AetnaR_2018$BID)] <- BID_AVG
#ASK
ASK_AVG <- ASK_AVG <- mean(df_AetnaR_2018$ASK, na.rm = TRUE)
df_AetnaR_2018$ASK[is.na(df_AetnaR_2018$ASK)] <- ASK_AVG
#OPENPRC
OPEN_AVG <- OPEN_AVG <- mean(df_AetnaR_2018$OPENPRC, na.rm = TRUE)
df_AetnaR_2018$OPENPRC[is.na(df_AetnaR_2018$OPENPRC)] <- OPEN_AVG
df_AetnaR_2018
df_AetnaR_2018$text <- clean_tweets(df_AetnaR_2018$text)
df_AetnaR_2018$text <- lemmatize_words(df_AetnaR_2018$text)
#Netflix
df_Netflix_2016 <- df_Netflix_2016[,-c(6,8,9,10,15,17,18,19,20,21)]
df_Netflix_2016
#Ticker
df_Netflix_2016$TICKER[is.na(df_Netflix_2016$TICKER)] <- "NFLX"
#PRC
PRC_AVG <- mean(df_Netflix_2016$PRC, na.rm = TRUE)
df_Netflix_2016$PRC[is.na(df_Netflix_2016$PRC)] <- PRC_AVG
#VOL
VOL_AVG <- mean(df_Netflix_2016$VOL, na.rm = TRUE)
df_Netflix_2016$VOL[is.na(df_Netflix_2016$VOL)] <- VOL_AVG
#BID
BID_AVG <- BID_AVG <- mean(df_Netflix_2016$BID, na.rm = TRUE)
df_Netflix_2016$BID[is.na(df_Netflix_2016$BID)] <- BID_AVG
#ASK
ASK_AVG <- ASK_AVG <- mean(df_Netflix_2016$ASK, na.rm = TRUE)
df_Netflix_2016$ASK[is.na(df_Netflix_2016$ASK)] <- ASK_AVG
#OPENPRC
OPEN_AVG <- OPEN_AVG <- mean(df_Netflix_2016$OPENPRC, na.rm = TRUE)
df_Netflix_2016$OPENPRC[is.na(df_Netflix_2016$OPENPRC)] <- OPEN_AVG
df_Netflix_2016
df_Netflix_2016$text <- clean_tweets(df_Netflix_2016$text)
df_Netflix_2016$text <- lemmatize_words(df_Netflix_2016$text)
#Disney
df_Disney_2018 <- df_Disney_2018[,-c(6,8,9,10,15,17,18,19,20,21)]
df_Disney_2018
#Ticker
df_Disney_2018$TICKER[is.na(df_Disney_2018$TICKER)] <- "DIS"
#PRC
PRC_AVG <- mean(df_Disney_2018$PRC, na.rm = TRUE)
df_Disney_2018$PRC[is.na(df_Disney_2018$PRC)] <- PRC_AVG
#VOL
VOL_AVG <- mean(df_Disney_2018$VOL, na.rm = TRUE)
df_Disney_2018$VOL[is.na(df_Disney_2018$VOL)] <- VOL_AVG
#BID
BID_AVG <- BID_AVG <- mean(df_Disney_2018$BID, na.rm = TRUE)
df_Disney_2018$BID[is.na(df_Disney_2018$BID)] <- BID_AVG
#ASK
ASK_AVG <- ASK_AVG <- mean(df_Disney_2018$ASK, na.rm = TRUE)
df_Disney_2018$ASK[is.na(df_Disney_2018$ASK)] <- ASK_AVG
#OPENPRC
OPEN_AVG <- OPEN_AVG <- mean(df_Disney_2018$OPENPRC, na.rm = TRUE)
df_Disney_2018$OPENPRC[is.na(df_Disney_2018$OPENPRC)] <- OPEN_AVG
df_Disney_2018
df_Disney_2018$text <- clean_tweets(df_Disney_2018$text)
df_Disney_2018$text <- lemmatize_words(df_Disney_2018$text)
#Fox
df_Fox_2015 <- df_Fox_2015[,-c(6,8,9,10,15,17,18,19,20,21)]
df_Fox_2015
#Ticker
df_Fox_2015$TICKER[is.na(df_Fox_2015$TICKER)] <- "FOX"
#PRC
PRC_AVG <- mean(df_Fox_2015$PRC, na.rm = TRUE)
df_Fox_2015$PRC[is.na(df_Fox_2015$PRC)] <- PRC_AVG
#VOL
VOL_AVG <- mean(df_Fox_2015$VOL, na.rm = TRUE)
df_Fox_2015$VOL[is.na(df_Fox_2015$VOL)] <- VOL_AVG
#BID
BID_AVG <- BID_AVG <- mean(df_Fox_2015$BID, na.rm = TRUE)
df_Fox_2015$BID[is.na(df_Fox_2015$BID)] <- BID_AVG
#ASK
ASK_AVG <- ASK_AVG <- mean(df_Fox_2015$ASK, na.rm = TRUE)
df_Fox_2015$ASK[is.na(df_Fox_2015$ASK)] <- ASK_AVG
#OPENPRC
OPEN_AVG <- OPEN_AVG <- mean(df_Fox_2015$OPENPRC, na.rm = TRUE)
df_Fox_2015$OPENPRC[is.na(df_Fox_2015$OPENPRC)] <- OPEN_AVG
df_Fox_2015
df_Fox_2015$text <- clean_tweets(df_Fox_2015$text)
df_Fox_2015$text <- lemmatize_words(df_Fox_2015$text)
#Microsoft
df_Microsoft_2016 <- df_Microsoft_2016[,-c(6,8,9,10,15,17,18,19,20,21)]
df_Microsoft_2016
#Ticker
df_Microsoft_2016$TICKER[is.na(df_Microsoft_2016$TICKER)] <- "MSFT"
#PRC
PRC_AVG <- mean(df_Microsoft_2016$PRC, na.rm = TRUE)
df_Microsoft_2016$PRC[is.na(df_Microsoft_2016$PRC)] <- PRC_AVG
#VOL
VOL_AVG <- mean(df_Microsoft_2016$VOL, na.rm = TRUE)
df_Microsoft_2016$VOL[is.na(df_Microsoft_2016$VOL)] <- VOL_AVG
#BID
BID_AVG <- BID_AVG <- mean(df_Microsoft_2016$BID, na.rm = TRUE)
df_Microsoft_2016$BID[is.na(df_Microsoft_2016$BID)] <- BID_AVG
#ASK
ASK_AVG <- ASK_AVG <- mean(df_Microsoft_2016$ASK, na.rm = TRUE)
df_Microsoft_2016$ASK[is.na(df_Microsoft_2016$ASK)] <- ASK_AVG
#OPENPRC
OPEN_AVG <- OPEN_AVG <- mean(df_Microsoft_2016$OPENPRC, na.rm = TRUE)
df_Microsoft_2016$OPENPRC[is.na(df_Microsoft_2016$OPENPRC)] <- OPEN_AVG
df_Microsoft_2016
df_Microsoft_2016$text <- clean_tweets(df_Microsoft_2016$text)
df_Microsoft_2016$text <- lemmatize_words(df_Microsoft_2016$text)
df_Microsoft_2018 <- df_Microsoft_2018[,-c(6,8,9,10,15,17,18,19,20,21)]
df_Microsoft_2018
#Ticker
df_Microsoft_2018$TICKER[is.na(df_Microsoft_2018$TICKER)] <- "MSFT"
#PRC
PRC_AVG <- mean(df_Microsoft_2018$PRC, na.rm = TRUE)
df_Microsoft_2018$PRC[is.na(df_Microsoft_2018$PRC)] <- PRC_AVG
#VOL
VOL_AVG <- mean(df_Microsoft_2018$VOL, na.rm = TRUE)
df_Microsoft_2018$VOL[is.na(df_Microsoft_2018$VOL)] <- VOL_AVG
#BID
BID_AVG <- BID_AVG <- mean(df_Microsoft_2018$BID, na.rm = TRUE)
df_Microsoft_2018$BID[is.na(df_Microsoft_2018$BID)] <- BID_AVG
#ASK
ASK_AVG <- ASK_AVG <- mean(df_Microsoft_2018$ASK, na.rm = TRUE)
df_Microsoft_2018$ASK[is.na(df_Microsoft_2018$ASK)] <- ASK_AVG
#OPENPRC
OPEN_AVG <- OPEN_AVG <- mean(df_Microsoft_2018$OPENPRC, na.rm = TRUE)
df_Microsoft_2018$OPENPRC[is.na(df_Microsoft_2018$OPENPRC)] <- OPEN_AVG
df_Microsoft_2018
df_Microsoft_2018$text <- clean_tweets(df_Microsoft_2018$text)
df_Microsoft_2018$text <- lemmatize_words(df_Microsoft_2018$text)
#Juniper_Shaygan
df_Juniper_S_2014 <- df_Juniper_S_2014[,-c(6,8,9,10,15,17,18,19,20,21)]
df_Juniper_S_2014
#Ticker
df_Juniper_S_2014$TICKER[is.na(df_Juniper_S_2014$TICKER)] <- "JNPR"
#PRC
PRC_AVG <- mean(df_Juniper_S_2014$PRC, na.rm = TRUE)
df_Juniper_S_2014$PRC[is.na(df_Juniper_S_2014$PRC)] <- PRC_AVG
#VOL
VOL_AVG <- mean(df_Juniper_S_2014$VOL, na.rm = TRUE)
df_Juniper_S_2014$VOL[is.na(df_Juniper_S_2014$VOL)] <- VOL_AVG
#BID
BID_AVG <- BID_AVG <- mean(df_Juniper_S_2014$BID, na.rm = TRUE)
df_Juniper_S_2014$BID[is.na(df_Juniper_S_2014$BID)] <- BID_AVG
#ASK
ASK_AVG <- ASK_AVG <- mean(df_Juniper_S_2014$ASK, na.rm = TRUE)
df_Juniper_S_2014$ASK[is.na(df_Juniper_S_2014$ASK)] <- ASK_AVG
#OPENPRC
OPEN_AVG <- OPEN_AVG <- mean(df_Juniper_S_2014$OPENPRC, na.rm = TRUE)
df_Juniper_S_2014$OPENPRC[is.na(df_Juniper_S_2014$OPENPRC)] <- OPEN_AVG
df_Juniper_S_2014
df_Juniper_S_2014$text <- clean_tweets(df_Juniper_S_2014$text)
df_Juniper_S_2014$text <- lemmatize_words(df_Juniper_S_2014$text)
df_Juniper_S_2016 <- df_Juniper_S_2016[,-c(6,8,9,10,15,17,18,19,20,21)]
df_Juniper_S_2016
#Ticker
df_Juniper_S_2016$TICKER[is.na(df_Juniper_S_2016$TICKER)] <- "JNPR"
#PRC
PRC_AVG <- mean(df_Juniper_S_2016$PRC, na.rm = TRUE)
df_Juniper_S_2016$PRC[is.na(df_Juniper_S_2016$PRC)] <- PRC_AVG
#VOL
VOL_AVG <- mean(df_Juniper_S_2016$VOL, na.rm = TRUE)
df_Juniper_S_2016$VOL[is.na(df_Juniper_S_2016$VOL)] <- VOL_AVG
#BID
BID_AVG <- BID_AVG <- mean(df_Juniper_S_2016$BID, na.rm = TRUE)
df_Juniper_S_2016$BID[is.na(df_Juniper_S_2016$BID)] <- BID_AVG
#ASK
ASK_AVG <- ASK_AVG <- mean(df_Juniper_S_2016$ASK, na.rm = TRUE)
df_Juniper_S_2016$ASK[is.na(df_Juniper_S_2016$ASK)] <- ASK_AVG
#OPENPRC
OPEN_AVG <- OPEN_AVG <- mean(df_Juniper_S_2016$OPENPRC, na.rm = TRUE)
df_Juniper_S_2016$OPENPRC[is.na(df_Juniper_S_2016$OPENPRC)] <- OPEN_AVG
df_Juniper_S_2016
df_Juniper_S_2016$text <- clean_tweets(df_Juniper_S_2016$text)
df_Juniper_S_2016$text <- lemmatize_words(df_Juniper_S_2016$text)
#Synchrony Financial
df_SYFM_2016 <- df_SYFM_2016[,-c(6,8,9,10,15,17,18,19,20,21)]
df_SYFM_2016
#Ticker
df_SYFM_2016$TICKER[is.na(df_SYFM_2016$TICKER)] <- "SYF"
#PRC
PRC_AVG <- mean(df_SYFM_2016$PRC, na.rm = TRUE)
df_SYFM_2016$PRC[is.na(df_SYFM_2016$PRC)] <- PRC_AVG
#VOL
VOL_AVG <- mean(df_SYFM_2016$VOL, na.rm = TRUE)
df_SYFM_2016$VOL[is.na(df_SYFM_2016$VOL)] <- VOL_AVG
#BID
BID_AVG <- BID_AVG <- mean(df_SYFM_2016$BID, na.rm = TRUE)
df_SYFM_2016$BID[is.na(df_SYFM_2016$BID)] <- BID_AVG
#ASK
ASK_AVG <- ASK_AVG <- mean(df_SYFM_2016$ASK, na.rm = TRUE)
df_SYFM_2016$ASK[is.na(df_SYFM_2016$ASK)] <- ASK_AVG
#OPENPRC
OPEN_AVG <- OPEN_AVG <- mean(df_SYFM_2016$OPENPRC, na.rm = TRUE)
df_SYFM_2016$OPENPRC[is.na(df_SYFM_2016$OPENPRC)] <- OPEN_AVG
df_SYFM_2016
df_SYFM_2016$text <- clean_tweets(df_SYFM_2016$text)
df_SYFM_2016$text <- lemmatize_words(df_SYFM_2016$text)
df_SYFM_2018 <- df_SYFM_2018[,-c(6,8,9,10,15,17,18,19,20,21)]
df_SYFM_2018
#Ticker
df_SYFM_2018$TICKER[is.na(df_SYFM_2018$TICKER)] <- "SYF"
#PRC
PRC_AVG <- mean(df_SYFM_2018$PRC, na.rm = TRUE)
df_SYFM_2018$PRC[is.na(df_SYFM_2018$PRC)] <- PRC_AVG
#VOL
VOL_AVG <- mean(df_SYFM_2018$VOL, na.rm = TRUE)
df_SYFM_2018$VOL[is.na(df_SYFM_2018$VOL)] <- VOL_AVG
#BID
BID_AVG <- BID_AVG <- mean(df_SYFM_2018$BID, na.rm = TRUE)
df_SYFM_2018$BID[is.na(df_SYFM_2018$BID)] <- BID_AVG
#ASK
ASK_AVG <- ASK_AVG <- mean(df_SYFM_2018$ASK, na.rm = TRUE)
df_SYFM_2018$ASK[is.na(df_SYFM_2018$ASK)] <- ASK_AVG
#OPENPRC
OPEN_AVG <- OPEN_AVG <- mean(df_SYFM_2018$OPENPRC, na.rm = TRUE)
df_SYFM_2018$OPENPRC[is.na(df_SYFM_2018$OPENPRC)] <- OPEN_AVG
df_SYFM_2018
df_SYFM_2018$text <- clean_tweets(df_SYFM_2018$text)
df_SYFM_2018$text <- lemmatize_words(df_SYFM_2018$text)
#Southern company
df_Southern_2016 <- df_Southern_2016[,-c(6,8,9,10,15,17,18,19,20,21)]
df_Southern_2016
#Ticker
df_Southern_2016$TICKER[is.na(df_Southern_2016$TICKER)] <- "SO"
#PRC
PRC_AVG <- mean(df_Southern_2016$PRC, na.rm = TRUE)
df_Southern_2016$PRC[is.na(df_Southern_2016$PRC)] <- PRC_AVG
#VOL
VOL_AVG <- mean(df_Southern_2016$VOL, na.rm = TRUE)
df_Southern_2016$VOL[is.na(df_Southern_2016$VOL)] <- VOL_AVG
#BID
BID_AVG <- BID_AVG <- mean(df_Southern_2016$BID, na.rm = TRUE)
df_Southern_2016$BID[is.na(df_Southern_2016$BID)] <- BID_AVG
#ASK
ASK_AVG <- ASK_AVG <- mean(df_Southern_2016$ASK, na.rm = TRUE)
df_Southern_2016$ASK[is.na(df_Southern_2016$ASK)] <- ASK_AVG
#OPENPRC
OPEN_AVG <- OPEN_AVG <- mean(df_Southern_2016$OPENPRC, na.rm = TRUE)
df_Southern_2016$OPENPRC[is.na(df_Southern_2016$OPENPRC)] <- OPEN_AVG
df_Southern_2016
df_Southern_2016$text <- clean_tweets(df_Southern_2016$text)
df_Southern_2016$text <- lemmatize_words(df_Southern_2016$text)
df_Southern_2018 <- df_Southern_2018[,-c(6,8,9,10,15,17,18,19,20,21)]
df_Southern_2018
#Ticker
df_Southern_2018$TICKER[is.na(df_Southern_2018$TICKER)] <- "SO"
#PRC
PRC_AVG <- mean(df_Southern_2018$PRC, na.rm = TRUE)
df_Southern_2018$PRC[is.na(df_Southern_2018$PRC)] <- PRC_AVG
#VOL
VOL_AVG <- mean(df_Southern_2018$VOL, na.rm = TRUE)
df_Southern_2018$VOL[is.na(df_Southern_2018$VOL)] <- VOL_AVG
#BID
BID_AVG <- BID_AVG <- mean(df_Southern_2018$BID, na.rm = TRUE)
df_Southern_2018$BID[is.na(df_Southern_2018$BID)] <- BID_AVG
#ASK
ASK_AVG <- ASK_AVG <- mean(df_Southern_2018$ASK, na.rm = TRUE)
df_Southern_2018$ASK[is.na(df_Southern_2018$ASK)] <- ASK_AVG
#OPENPRC
OPEN_AVG <- OPEN_AVG <- mean(df_Southern_2018$OPENPRC, na.rm = TRUE)
df_Southern_2018$OPENPRC[is.na(df_Southern_2018$OPENPRC)] <- OPEN_AVG
df_Southern_2018
df_Southern_2018$text <- clean_tweets(df_Southern_2018$text)
df_Southern_2018$text <- lemmatize_words(df_Southern_2018$text)
#Apple
df_Apple_2016 <- df_Apple_2016[,-c(6,8,9,10,15,17,18,19,20,21)]
df_Apple_2016
#Ticker
df_Apple_2016$TICKER[is.na(df_Apple_2016$TICKER)] <- "AAPL"
#PRC
PRC_AVG <- mean(df_Apple_2016$PRC, na.rm = TRUE)
df_Apple_2016$PRC[is.na(df_Apple_2016$PRC)] <- PRC_AVG
#VOL
VOL_AVG <- mean(df_Apple_2016$VOL, na.rm = TRUE)
df_Apple_2016$VOL[is.na(df_Apple_2016$VOL)] <- VOL_AVG
#BID
BID_AVG <- BID_AVG <- mean(df_Apple_2016$BID, na.rm = TRUE)
df_Apple_2016$BID[is.na(df_Apple_2016$BID)] <- BID_AVG
#ASK
ASK_AVG <- ASK_AVG <- mean(df_Apple_2016$ASK, na.rm = TRUE)
df_Apple_2016$ASK[is.na(df_Apple_2016$ASK)] <- ASK_AVG
#OPENPRC
OPEN_AVG <- OPEN_AVG <- mean(df_Apple_2016$OPENPRC, na.rm = TRUE)
df_Apple_2016$OPENPRC[is.na(df_Apple_2016$OPENPRC)] <- OPEN_AVG
df_Apple_2016
df_Apple_2016$text <- clean_tweets(df_Apple_2016$text)
df_Apple_2016$text <- lemmatize_words(df_Apple_2016$text)
df_Apple_2018 <- df_Apple_2018[,-c(6,8,9,10,15,17,18,19,20,21)]
df_Apple_2018
#Ticker
df_Apple_2018$TICKER[is.na(df_Apple_2018$TICKER)] <- "AAPL"
#PRC
PRC_AVG <- mean(df_Apple_2018$PRC, na.rm = TRUE)
df_Apple_2018$PRC[is.na(df_Apple_2018$PRC)] <- PRC_AVG
#VOL
VOL_AVG <- mean(df_Apple_2018$VOL, na.rm = TRUE)
df_Apple_2018$VOL[is.na(df_Apple_2018$VOL)] <- VOL_AVG
#BID
BID_AVG <- BID_AVG <- mean(df_Apple_2018$BID, na.rm = TRUE)
df_Apple_2018$BID[is.na(df_Apple_2018$BID)] <- BID_AVG
#ASK
ASK_AVG <- ASK_AVG <- mean(df_Apple_2018$ASK, na.rm = TRUE)
df_Apple_2018$ASK[is.na(df_Apple_2018$ASK)] <- ASK_AVG
#OPENPRC
OPEN_AVG <- OPEN_AVG <- mean(df_Apple_2018$OPENPRC, na.rm = TRUE)
df_Apple_2018$OPENPRC[is.na(df_Apple_2018$OPENPRC)] <- OPEN_AVG
df_Apple_2018
df_Apple_2018$text <- clean_tweets(df_Apple_2018$text)
df_Apple_2018$text <- lemmatize_words(df_Apple_2018$text)
#XL
df_XL_2016 <- df_XL_2016[,-c(6,8,9,10,15,17,18,19,20,21)]
df_XL_2016
#Ticker
df_XL_2016$TICKER[is.na(df_XL_2016$TICKER)] <- "XL"
#PRC
PRC_AVG <- mean(df_XL_2016$PRC, na.rm = TRUE)
df_XL_2016$PRC[is.na(df_XL_2016$PRC)] <- PRC_AVG
#VOL
VOL_AVG <- mean(df_XL_2016$VOL, na.rm = TRUE)
df_XL_2016$VOL[is.na(df_XL_2016$VOL)] <- VOL_AVG
#BID
BID_AVG <- BID_AVG <- mean(df_XL_2016$BID, na.rm = TRUE)
df_XL_2016$BID[is.na(df_XL_2016$BID)] <- BID_AVG
#ASK
ASK_AVG <- ASK_AVG <- mean(df_XL_2016$ASK, na.rm = TRUE)
df_XL_2016$ASK[is.na(df_XL_2016$ASK)] <- ASK_AVG
#OPENPRC
OPEN_AVG <- OPEN_AVG <- mean(df_XL_2016$OPENPRC, na.rm = TRUE)
df_XL_2016$OPENPRC[is.na(df_XL_2016$OPENPRC)] <- OPEN_AVG
df_XL_2016
df_XL_2016$text <- clean_tweets(df_XL_2016$text)
df_XL_2016$text <- lemmatize_words(df_XL_2016$text)
df_XL_2018 <- df_XL_2018[,-c(6,8,9,10,15,17,18,19,20,21)]
df_XL_2018
#Ticker
df_XL_2018$TICKER[is.na(df_XL_2018$TICKER)] <- "XL"
#PRC
PRC_AVG <- mean(df_XL_2018$PRC, na.rm = TRUE)
df_XL_2018$PRC[is.na(df_XL_2018$PRC)] <- PRC_AVG
#VOL
VOL_AVG <- mean(df_XL_2018$VOL, na.rm = TRUE)
df_XL_2018$VOL[is.na(df_XL_2018$VOL)] <- VOL_AVG
#BID
BID_AVG <- BID_AVG <- mean(df_XL_2018$BID, na.rm = TRUE)
df_XL_2018$BID[is.na(df_XL_2018$BID)] <- BID_AVG
#ASK
ASK_AVG <- ASK_AVG <- mean(df_XL_2018$ASK, na.rm = TRUE)
df_XL_2018$ASK[is.na(df_XL_2018$ASK)] <- ASK_AVG
#OPENPRC
OPEN_AVG <- OPEN_AVG <- mean(df_XL_2018$OPENPRC, na.rm = TRUE)
df_XL_2018$OPENPRC[is.na(df_XL_2018$OPENPRC)] <- OPEN_AVG
df_XL_2018
df_XL_2018$text <- clean_tweets(df_XL_2018$text)
df_XL_2018$text <- lemmatize_words(df_XL_2018$text)
#Tyson foods
df_Tysonf_2016 <- df_Tysonf_2016[,-c(6,8,9,10,15,17,18,19,20,21)]
df_Tysonf_2016
#Ticker
df_Tysonf_2016$TICKER[is.na(df_Tysonf_2016$TICKER)] <- "TSN"
#PRC
PRC_AVG <- mean(df_Tysonf_2016$PRC, na.rm = TRUE)
df_Tysonf_2016$PRC[is.na(df_Tysonf_2016$PRC)] <- PRC_AVG
#VOL
VOL_AVG <- mean(df_Tysonf_2016$VOL, na.rm = TRUE)
df_Tysonf_2016$VOL[is.na(df_Tysonf_2016$VOL)] <- VOL_AVG
#BID
BID_AVG <- BID_AVG <- mean(df_Tysonf_2016$BID, na.rm = TRUE)
df_Tysonf_2016$BID[is.na(df_Tysonf_2016$BID)] <- BID_AVG
#ASK
ASK_AVG <- ASK_AVG <- mean(df_Tysonf_2016$ASK, na.rm = TRUE)
df_Tysonf_2016$ASK[is.na(df_Tysonf_2016$ASK)] <- ASK_AVG
#OPENPRC
OPEN_AVG <- OPEN_AVG <- mean(df_Tysonf_2016$OPENPRC, na.rm = TRUE)
df_Tysonf_2016$OPENPRC[is.na(df_Tysonf_2016$OPENPRC)] <- OPEN_AVG
df_Tysonf_2016
df_Tysonf_2016$text <- clean_tweets(df_Tysonf_2016$text)
df_Tysonf_2016$text <- lemmatize_words(df_Tysonf_2016$text)
#Tyson foods
df_Tysonf_2018 <- df_Tysonf_2018[,-c(6,8,9,10,15,17,18,19,20,21)]
df_Tysonf_2018
#Ticker
df_Tysonf_2018$TICKER[is.na(df_Tysonf_2018$TICKER)] <- "TSN"
#PRC
PRC_AVG <- mean(df_Tysonf_2018$PRC, na.rm = TRUE)
df_Tysonf_2018$PRC[is.na(df_Tysonf_2018$PRC)] <- PRC_AVG
#VOL
VOL_AVG <- mean(df_Tysonf_2018$VOL, na.rm = TRUE)
df_Tysonf_2018$VOL[is.na(df_Tysonf_2018$VOL)] <- VOL_AVG
#BID
BID_AVG <- BID_AVG <- mean(df_Tysonf_2018$BID, na.rm = TRUE)
df_Tysonf_2018$BID[is.na(df_Tysonf_2018$BID)] <- BID_AVG
#ASK
ASK_AVG <- ASK_AVG <- mean(df_Tysonf_2018$ASK, na.rm = TRUE)
df_Tysonf_2018$ASK[is.na(df_Tysonf_2018$ASK)] <- ASK_AVG
#OPENPRC
OPEN_AVG <- OPEN_AVG <- mean(df_Tysonf_2018$OPENPRC, na.rm = TRUE)
df_Tysonf_2018$OPENPRC[is.na(df_Tysonf_2018$OPENPRC)] <- OPEN_AVG
df_Tysonf_2018
df_Tysonf_2018$text <- clean_tweets(df_Tysonf_2018$text)
df_Tysonf_2018$text <- lemmatize_words(df_Tysonf_2018$text)
#Akamai
df_Akamai_2016 <- df_Akamai_2016[,-c(6,8,9,10,15,17,18,19,20,21)]
df_Akamai_2016
#Ticker
df_Akamai_2016$TICKER[is.na(df_Akamai_2016$TICKER)] <- "AKAM"
#PRC
PRC_AVG <- mean(df_Akamai_2016$PRC, na.rm = TRUE)
df_Akamai_2016$PRC[is.na(df_Akamai_2016$PRC)] <- PRC_AVG
#VOL
VOL_AVG <- mean(df_Akamai_2016$VOL, na.rm = TRUE)
df_Akamai_2016$VOL[is.na(df_Akamai_2016$VOL)] <- VOL_AVG
#BID
BID_AVG <- BID_AVG <- mean(df_Akamai_2016$BID, na.rm = TRUE)
df_Akamai_2016$BID[is.na(df_Akamai_2016$BID)] <- BID_AVG
#ASK
ASK_AVG <- ASK_AVG <- mean(df_Akamai_2016$ASK, na.rm = TRUE)
df_Akamai_2016$ASK[is.na(df_Akamai_2016$ASK)] <- ASK_AVG
#OPENPRC
OPEN_AVG <- OPEN_AVG <- mean(df_Akamai_2016$OPENPRC, na.rm = TRUE)
df_Akamai_2016$OPENPRC[is.na(df_Akamai_2016$OPENPRC)] <- OPEN_AVG
df_Akamai_2016
df_Akamai_2016$text <- clean_tweets(df_Akamai_2016$text)
df_Akamai_2016$text <- lemmatize_words(df_Akamai_2016$text)
df_Akamai_2018 <- df_Akamai_2018[,-c(6,8,9,10,15,17,18,19,20,21)]
df_Akamai_2018
#Ticker
df_Akamai_2018$TICKER[is.na(df_Akamai_2018$TICKER)] <- "AKAM"
#PRC
PRC_AVG <- mean(df_Akamai_2018$PRC, na.rm = TRUE)
df_Akamai_2018$PRC[is.na(df_Akamai_2018$PRC)] <- PRC_AVG
#VOL
VOL_AVG <- mean(df_Akamai_2018$VOL, na.rm = TRUE)
df_Akamai_2018$VOL[is.na(df_Akamai_2018$VOL)] <- VOL_AVG
#BID
BID_AVG <- BID_AVG <- mean(df_Akamai_2018$BID, na.rm = TRUE)
df_Akamai_2018$BID[is.na(df_Akamai_2018$BID)] <- BID_AVG
#ASK
ASK_AVG <- ASK_AVG <- mean(df_Akamai_2018$ASK, na.rm = TRUE)
df_Akamai_2018$ASK[is.na(df_Akamai_2018$ASK)] <- ASK_AVG
#OPENPRC
OPEN_AVG <- OPEN_AVG <- mean(df_Akamai_2018$OPENPRC, na.rm = TRUE)
df_Akamai_2018$OPENPRC[is.na(df_Akamai_2018$OPENPRC)] <- OPEN_AVG
df_Akamai_2018
df_Akamai_2018$text <- clean_tweets(df_Akamai_2018$text)
df_Akamai_2018$text <- lemmatize_words(df_Akamai_2018$text)
#Google
df_GOOGL_2015 <-merge(GOOGL_2015, GOOGL_SP_2016, by = "date", all.x = TRUE)
write.csv(df_GOOGL_2015, file = "Google_2015", row.names = FALSE)
df_GOOGL_2015 <- df_GOOGL_2015[,-c(6,8,9,10,15,17,18,19,20,21)]
df_GOOGL_2015
#Ticker
df_GOOGL_2015$TICKER[is.na(df_GOOGL_2015$TICKER)] <- "GOOGL"
#PRC
PRC_AVG <- mean(df_GOOGL_2015$PRC, na.rm = TRUE)
df_GOOGL_2015$PRC[is.na(df_GOOGL_2015$PRC)] <- PRC_AVG
#VOL
VOL_AVG <- mean(df_GOOGL_2015$VOL, na.rm = TRUE)
df_GOOGL_2015$VOL[is.na(df_GOOGL_2015$VOL)] <- VOL_AVG
#BID
BID_AVG <- BID_AVG <- mean(df_GOOGL_2015$BID, na.rm = TRUE)
df_GOOGL_2015$BID[is.na(df_GOOGL_2015$BID)] <- BID_AVG
#ASK
ASK_AVG <- ASK_AVG <- mean(df_GOOGL_2015$ASK, na.rm = TRUE)
df_GOOGL_2015$ASK[is.na(df_GOOGL_2015$ASK)] <- ASK_AVG
#OPENPRC
OPEN_AVG <- OPEN_AVG <- mean(df_GOOGL_2015$OPENPRC, na.rm = TRUE)
df_GOOGL_2015$OPENPRC[is.na(df_GOOGL_2015$OPENPRC)] <- OPEN_AVG
df_GOOGL_2015
df_GOOGL_2015$text <- clean_tweets(df_GOOGL_2015$text)
df_GOOGL_2015$text <- lemmatize_words(df_GOOGL_2015$text)
df_GOOGL_2018 <-merge(GOOGL_2018, GOOGLE_SP_2018, by = "date", all.x = TRUE)
write.csv(df_GOOGL_2018, file = "Google_2018", row.names = FALSE)
df_GOOGL_2018 <- df_GOOGL_2018[,-c(6,8,9,10,15,17,18,19,20,21)]
df_GOOGL_2018
#Ticker
df_GOOGL_2018$TICKER[is.na(df_GOOGL_2018$TICKER)] <- "GOOGL"
#PRC
PRC_AVG <- mean(df_GOOGL_2018$PRC, na.rm = TRUE)
df_GOOGL_2018$PRC[is.na(df_GOOGL_2018$PRC)] <- PRC_AVG
#VOL
VOL_AVG <- mean(df_GOOGL_2018$VOL, na.rm = TRUE)
df_GOOGL_2018$VOL[is.na(df_GOOGL_2018$VOL)] <- VOL_AVG
#BID
BID_AVG <- BID_AVG <- mean(df_GOOGL_2018$BID, na.rm = TRUE)
df_GOOGL_2018$BID[is.na(df_GOOGL_2018$BID)] <- BID_AVG
#ASK
ASK_AVG <- ASK_AVG <- mean(df_GOOGL_2018$ASK, na.rm = TRUE)
df_GOOGL_2018$ASK[is.na(df_GOOGL_2018$ASK)] <- ASK_AVG
#OPENPRC
OPEN_AVG <- OPEN_AVG <- mean(df_GOOGL_2018$OPENPRC, na.rm = TRUE)
df_GOOGL_2018$OPENPRC[is.na(df_GOOGL_2018$OPENPRC)] <- OPEN_AVG
df_GOOGL_2018
df_GOOGL_2018$text <- clean_tweets(df_GOOGL_2018$text)
df_GOOGL_2018$text <- lemmatize_words(df_GOOGL_2018$text)
##Covid datasets
#WU
df_WU_covid <-merge(WU_covid, WU_SP_covid, by = "date", all.x = TRUE)
write.csv(df_WU_covid, file = "WU_COVID", row.names = FALSE)
df_WU_covid <- df_WU_covid[,-c(6,8,9,10,15,17,18,19,20,21)]
df_WU_covid
#Ticker
df_WU_covid$TICKER[is.na(df_WU_covid$TICKER)] <- "WU"
#PRC
PRC_AVG <- mean(df_WU_covid$PRC, na.rm = TRUE)
df_WU_covid$PRC[is.na(df_WU_covid$PRC)] <- PRC_AVG
#VOL
VOL_AVG <- mean(df_WU_covid$VOL, na.rm = TRUE)
df_WU_covid$VOL[is.na(df_WU_covid$VOL)] <- VOL_AVG
#BID
BID_AVG <- BID_AVG <- mean(df_WU_covid$BID, na.rm = TRUE)
df_WU_covid$BID[is.na(df_WU_covid$BID)] <- BID_AVG
#ASK
ASK_AVG <- ASK_AVG <- mean(df_WU_covid$ASK, na.rm = TRUE)
df_WU_covid$ASK[is.na(df_WU_covid$ASK)] <- ASK_AVG
#OPENPRC
OPEN_AVG <- OPEN_AVG <- mean(df_WU_covid$OPENPRC, na.rm = TRUE)
df_WU_covid$OPENPRC[is.na(df_WU_covid$OPENPRC)] <- OPEN_AVG
df_WU_covid
df_WU_covid$text <- clean_tweets(df_WU_covid$text)
df_WU_covid$text <- lemmatize_words(df_WU_covid$text)
#AAPL
df_AAPL_covid <-merge(AAPL_covid, AAPL_SP_covid, by = "date", all.x = TRUE)
write.csv(df_AAPL_covid, file = "AAPL_COVID", row.names = FALSE)
df_AAPL_covid <- df_AAPL_covid[,-c(6,8,9,10,15,17,18,19,20,21)]
df_AAPL_covid
#Ticker
df_AAPL_covid$TICKER[is.na(df_AAPL_covid$TICKER)] <- "AAPL"
#PRC
PRC_AVG <- mean(df_AAPL_covid$PRC, na.rm = TRUE)
df_AAPL_covid$PRC[is.na(df_AAPL_covid$PRC)] <- PRC_AVG
#VOL
VOL_AVG <- mean(df_AAPL_covid$VOL, na.rm = TRUE)
df_AAPL_covid$VOL[is.na(df_AAPL_covid$VOL)] <- VOL_AVG
#BID
BID_AVG <- BID_AVG <- mean(df_AAPL_covid$BID, na.rm = TRUE)
df_AAPL_covid$BID[is.na(df_AAPL_covid$BID)] <- BID_AVG
#ASK
ASK_AVG <- ASK_AVG <- mean(df_AAPL_covid$ASK, na.rm = TRUE)
df_AAPL_covid$ASK[is.na(df_AAPL_covid$ASK)] <- ASK_AVG
#OPENPRC
OPEN_AVG <- OPEN_AVG <- mean(df_AAPL_covid$OPENPRC, na.rm = TRUE)
df_AAPL_covid$OPENPRC[is.na(df_AAPL_covid$OPENPRC)] <- OPEN_AVG
df_AAPL_covid
df_AAPL_covid$text <- clean_tweets(df_AAPL_covid$text)
df_AAPL_covid$text <- lemmatize_words(df_AAPL_covid$text)
#RMD
df_RMD_covid <-merge(RMD_covid, RMD_SP_covid, by = "date", all.x = TRUE)
write.csv(df_RMD_covid, file = "RMD_COVID", row.names = FALSE)
df_RMD_covid <- df_RMD_covid[,-c(6,8,9,10,15,17,18,19,20,21)]
df_RMD_covid
#Ticker
df_RMD_covid$TICKER[is.na(df_RMD_covid$TICKER)] <- "RMD"
#PRC
PRC_AVG <- mean(df_RMD_covid$PRC, na.rm = TRUE)
df_RMD_covid$PRC[is.na(df_RMD_covid$PRC)] <- PRC_AVG
#VOL
VOL_AVG <- mean(df_RMD_covid$VOL, na.rm = TRUE)
df_RMD_covid$VOL[is.na(df_RMD_covid$VOL)] <- VOL_AVG
#BID
BID_AVG <- BID_AVG <- mean(df_RMD_covid$BID, na.rm = TRUE)
df_RMD_covid$BID[is.na(df_RMD_covid$BID)] <- BID_AVG
#ASK
ASK_AVG <- ASK_AVG <- mean(df_RMD_covid$ASK, na.rm = TRUE)
df_RMD_covid$ASK[is.na(df_RMD_covid$ASK)] <- ASK_AVG
#OPENPRC
OPEN_AVG <- OPEN_AVG <- mean(df_RMD_covid$OPENPRC, na.rm = TRUE)
df_RMD_covid$OPENPRC[is.na(df_RMD_covid$OPENPRC)] <- OPEN_AVG
df_RMD_covid
df_RMD_covid$text <- clean_tweets(df_RMD_covid$text)
df_RMD_covid$text <- lemmatize_words(df_RMD_covid$text)
#GOOGL
df_GOOGL_covid <-merge(GOOGL_covid, GOOGL_SP_covid, by = "date", all.x = TRUE)
write.csv(df_GOOGL_covid, file = "GOOGL_COVID", row.names = FALSE)
df_GOOGL_covid <- df_GOOGL_covid[,-c(6,8,9,10,15,17,18,19,20,21)]
df_GOOGL_covid
#Ticker
df_GOOGL_covid$TICKER[is.na(df_GOOGL_covid$TICKER)] <- "GOOGL"
#PRC
PRC_AVG <- mean(df_GOOGL_covid$PRC, na.rm = TRUE)
df_GOOGL_covid$PRC[is.na(df_GOOGL_covid$PRC)] <- PRC_AVG
#VOL
VOL_AVG <- mean(df_GOOGL_covid$VOL, na.rm = TRUE)
df_GOOGL_covid$VOL[is.na(df_GOOGL_covid$VOL)] <- VOL_AVG
#BID
BID_AVG <- BID_AVG <- mean(df_GOOGL_covid$BID, na.rm = TRUE)
df_GOOGL_covid$BID[is.na(df_GOOGL_covid$BID)] <- BID_AVG
#ASK
ASK_AVG <- ASK_AVG <- mean(df_GOOGL_covid$ASK, na.rm = TRUE)
df_GOOGL_covid$ASK[is.na(df_GOOGL_covid$ASK)] <- ASK_AVG
#OPENPRC
OPEN_AVG <- OPEN_AVG <- mean(df_GOOGL_covid$OPENPRC, na.rm = TRUE)
df_GOOGL_covid$OPENPRC[is.na(df_GOOGL_covid$OPENPRC)] <- OPEN_AVG
df_GOOGL_covid
df_GOOGL_covid$text <- clean_tweets(df_GOOGL_covid$text)
df_GOOGL_covid$text <- lemmatize_words(df_GOOGL_covid$text)
#AGN
df_AGN_covid <-merge(AGN_covid, AGN_SP_covid, by = "date", all.x = TRUE)
write.csv(df_AGN_covid, file = "AGN_COVID", row.names = FALSE)
df_AGN_covid <- df_AGN_covid[,-c(6,8,9,10,15,17,18,19,20,21)]
df_AGN_covid
#Ticker
df_AGN_covid$TICKER[is.na(df_AGN_covid$TICKER)] <- "AGN"
#PRC
PRC_AVG <- mean(df_AGN_covid$PRC, na.rm = TRUE)
df_AGN_covid$PRC[is.na(df_AGN_covid$PRC)] <- PRC_AVG
#VOL
VOL_AVG <- mean(df_AGN_covid$VOL, na.rm = TRUE)
df_AGN_covid$VOL[is.na(df_AGN_covid$VOL)] <- VOL_AVG
#BID
BID_AVG <- BID_AVG <- mean(df_AGN_covid$BID, na.rm = TRUE)
df_AGN_covid$BID[is.na(df_AGN_covid$BID)] <- BID_AVG
#ASK
ASK_AVG <- ASK_AVG <- mean(df_AGN_covid$ASK, na.rm = TRUE)
df_AGN_covid$ASK[is.na(df_AGN_covid$ASK)] <- ASK_AVG
#OPENPRC
OPEN_AVG <- OPEN_AVG <- mean(df_AGN_covid$OPENPRC, na.rm = TRUE)
df_AGN_covid$OPENPRC[is.na(df_AGN_covid$OPENPRC)] <- OPEN_AVG
df_AGN_covid
df_AGN_covid$text <- clean_tweets(df_AGN_covid$text)
df_AGN_covid$text <- lemmatize_words(df_AGN_covid$text)
#AMAZON
df_AMZN_covid <-merge(AMZN_covid, AMZN_SP_covid, by = "date", all.x = TRUE)
write.csv(df_AMZN_covid, file = "AMZN_COVID", row.names = FALSE)
df_AMZN_covid <- df_AMZN_covid[,-c(6,8,9,10,15,17,18,19,20,21)]
df_AMZN_covid
#Ticker
df_AMZN_covid$TICKER[is.na(df_AMZN_covid$TICKER)] <- "AMZN"
#PRC
PRC_AVG <- mean(df_AMZN_covid$PRC, na.rm = TRUE)
df_AMZN_covid$PRC[is.na(df_AMZN_covid$PRC)] <- PRC_AVG
#VOL
VOL_AVG <- mean(df_AMZN_covid$VOL, na.rm = TRUE)
df_AMZN_covid$VOL[is.na(df_AMZN_covid$VOL)] <- VOL_AVG
#BID
BID_AVG <- BID_AVG <- mean(df_AMZN_covid$BID, na.rm = TRUE)
df_AMZN_covid$BID[is.na(df_AMZN_covid$BID)] <- BID_AVG
#ASK
ASK_AVG <- ASK_AVG <- mean(df_AMZN_covid$ASK, na.rm = TRUE)
df_AMZN_covid$ASK[is.na(df_AMZN_covid$ASK)] <- ASK_AVG
#OPENPRC
OPEN_AVG <- OPEN_AVG <- mean(df_AMZN_covid$OPENPRC, na.rm = TRUE)
df_AMZN_covid$OPENPRC[is.na(df_AMZN_covid$OPENPRC)] <- OPEN_AVG
df_AMZN_covid
df_AMZN_covid$text <- clean_tweets(df_AMZN_covid$text)
df_AMZN_covid$text <- lemmatize_words(df_AMZN_covid$text)
#Disney
df_DIS_covid <-merge(DIS_covid, DIS_SP_covid, by = "date", all.x = TRUE)
write.csv(df_DIS_covid, file = "DIS_COVID", row.names = FALSE)
df_DIS_covid <- df_DIS_covid[,-c(6,8,9,10,15,17,18,19,20,21)]
df_DIS_covid
#Ticker
df_DIS_covid$TICKER[is.na(df_DIS_covid$TICKER)] <- "DIS"
#PRC
PRC_AVG <- mean(df_DIS_covid$PRC, na.rm = TRUE)
df_DIS_covid$PRC[is.na(df_DIS_covid$PRC)] <- PRC_AVG
#VOL
VOL_AVG <- mean(df_DIS_covid$VOL, na.rm = TRUE)
df_DIS_covid$VOL[is.na(df_DIS_covid$VOL)] <- VOL_AVG
#BID
BID_AVG <- BID_AVG <- mean(df_DIS_covid$BID, na.rm = TRUE)
df_DIS_covid$BID[is.na(df_DIS_covid$BID)] <- BID_AVG
#ASK
ASK_AVG <- ASK_AVG <- mean(df_DIS_covid$ASK, na.rm = TRUE)
df_DIS_covid$ASK[is.na(df_DIS_covid$ASK)] <- ASK_AVG
#OPENPRC
OPEN_AVG <- OPEN_AVG <- mean(df_DIS_covid$OPENPRC, na.rm = TRUE)
df_DIS_covid$OPENPRC[is.na(df_DIS_covid$OPENPRC)] <- OPEN_AVG
df_DIS_covid
df_DIS_covid$text <- clean_tweets(df_DIS_covid$text)
df_DIS_covid$text <- lemmatize_words(df_DIS_covid$text)
#Microsoft
df_MSFT_covid <-merge(MSFT_covid, MSFT_SP_covid, by = "date", all.x = TRUE)
write.csv(df_MSFT_covid, file = "MSFT_COVID", row.names = FALSE)
df_MSFT_covid <- df_MSFT_covid[,-c(6,8,9,10,15,17,18,19,20,21)]
df_MSFT_covid
#Ticker
df_MSFT_covid$TICKER[is.na(df_MSFT_covid$TICKER)] <- "MSFT"
#PRC
PRC_AVG <- mean(df_MSFT_covid$PRC, na.rm = TRUE)
df_MSFT_covid$PRC[is.na(df_MSFT_covid$PRC)] <- PRC_AVG
#VOL
VOL_AVG <- mean(df_MSFT_covid$VOL, na.rm = TRUE)
df_MSFT_covid$VOL[is.na(df_MSFT_covid$VOL)] <- VOL_AVG
#BID
BID_AVG <- BID_AVG <- mean(df_MSFT_covid$BID, na.rm = TRUE)
df_MSFT_covid$BID[is.na(df_MSFT_covid$BID)] <- BID_AVG
#ASK
ASK_AVG <- ASK_AVG <- mean(df_MSFT_covid$ASK, na.rm = TRUE)
df_MSFT_covid$ASK[is.na(df_MSFT_covid$ASK)] <- ASK_AVG
#OPENPRC
OPEN_AVG <- OPEN_AVG <- mean(df_MSFT_covid$OPENPRC, na.rm = TRUE)
df_MSFT_covid$OPENPRC[is.na(df_MSFT_covid$OPENPRC)] <- OPEN_AVG
df_MSFT_covid
df_MSFT_covid$text <- clean_tweets(df_MSFT_covid$text)
df_MSFT_covid$text <- lemmatize_words(df_MSFT_covid$text)
#Medtronic
df_MDT_covid <-merge(MDT_Covid, MDT_SP_covid, by = "date", all.x = TRUE)
write.csv(df_MDT_covid, file = "MDT_COVID", row.names = FALSE)
df_MDT_covid <- df_MDT_covid[,-c(6,8,9,10,15,17,18,19,20,21)]
df_MDT_covid
#Ticker
df_MDT_covid$TICKER[is.na(df_MDT_covid$TICKER)] <- "MDT"
#PRC
PRC_AVG <- mean(df_MDT_covid$PRC, na.rm = TRUE)
df_MDT_covid$PRC[is.na(df_MDT_covid$PRC)] <- PRC_AVG
#VOL
VOL_AVG <- mean(df_MDT_covid$VOL, na.rm = TRUE)
df_MDT_covid$VOL[is.na(df_MDT_covid$VOL)] <- VOL_AVG
#BID
BID_AVG <- BID_AVG <- mean(df_MDT_covid$BID, na.rm = TRUE)
df_MDT_covid$BID[is.na(df_MDT_covid$BID)] <- BID_AVG
#ASK
ASK_AVG <- ASK_AVG <- mean(df_MDT_covid$ASK, na.rm = TRUE)
df_MDT_covid$ASK[is.na(df_MDT_covid$ASK)] <- ASK_AVG
#OPENPRC
OPEN_AVG <- OPEN_AVG <- mean(df_MDT_covid$OPENPRC, na.rm = TRUE)
df_MDT_covid$OPENPRC[is.na(df_MDT_covid$OPENPRC)] <- OPEN_AVG
df_MDT_covid
df_MDT_covid$text <- clean_tweets(df_MDT_covid$text)
df_MDT_covid$text <- lemmatize_words(df_MDT_covid$text)
#RedHat
df_RHT_covid <-merge(RHT_covid, RHT_SP_covid, by = "date", all.x = TRUE)
write.csv(df_RHT_covid, file = "RHT_COVID", row.names = FALSE)
df_RHT_covid <- df_RHT_covid[,-c(6,8,9,10,15,17,18,19,20,21)]
df_RHT_covid
#Ticker
df_RHT_covid$TICKER[is.na(df_RHT_covid$TICKER)] <- "RHT"
#PRC
PRC_AVG <- mean(df_RHT_covid$PRC, na.rm = TRUE)
df_RHT_covid$PRC[is.na(df_RHT_covid$PRC)] <- PRC_AVG
#VOL
VOL_AVG <- mean(df_RHT_covid$VOL, na.rm = TRUE)
df_RHT_covid$VOL[is.na(df_RHT_covid$VOL)] <- VOL_AVG
#BID
BID_AVG <- BID_AVG <- mean(df_RHT_covid$BID, na.rm = TRUE)
df_RHT_covid$BID[is.na(df_RHT_covid$BID)] <- BID_AVG
#ASK
ASK_AVG <- ASK_AVG <- mean(df_RHT_covid$ASK, na.rm = TRUE)
df_RHT_covid$ASK[is.na(df_RHT_covid$ASK)] <- ASK_AVG
#OPENPRC
OPEN_AVG <- OPEN_AVG <- mean(df_RHT_covid$OPENPRC, na.rm = TRUE)
df_RHT_covid$OPENPRC[is.na(df_RHT_covid$OPENPRC)] <- OPEN_AVG
df_RHT_covid
df_RHT_covid$text <- clean_tweets(df_RHT_covid$text)
df_RHT_covid$text <- lemmatize_words(df_RHT_covid$text)
##Wordclouds
#Packages
library(wordcloud)
Loading required package: RColorBrewer
library(RColorBrewer)
library(wordcloud2)
Registered S3 method overwritten by 'htmlwidgets':
method from
print.htmlwidget tools:rstudio
library(tm)
Loading required package: NLP
library("tidyverse")
Registered S3 methods overwritten by 'dbplyr':
method from
print.tbl_lazy
print.tbl_sql
── Attaching packages ───────────────────────────────── tidyverse 1.3.2 ──✔ ggplot2 3.4.2 ✔ purrr 0.3.4
✔ tibble 3.1.8 ✔ dplyr 1.0.10
✔ tidyr 1.2.1 ✔ stringr 1.5.0
✔ readr 2.1.4 ✔ forcats 0.5.2 ── Conflicts ──────────────────────────────────── tidyverse_conflicts() ──
✖ ggplot2::annotate() masks NLP::annotate()
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag() masks stats::lag()
#vector containing only the text
text <- df_Test$text
#corpus
docs <- Corpus(VectorSource(text))
docs <- docs %>%
tm_map(removeNumbers) %>%
tm_map(removePunctuation) %>%
tm_map(stripWhitespace)
docs <- tm_map(docs, content_transformer(tolower))
docs <- tm_map(docs, removeWords, stopwords("english"))
dtm <- TermDocumentMatrix(docs)
matrix <- as.matrix(dtm)
words <- sort(rowSums(matrix),decreasing=TRUE)
df_wc <- data.frame(word = names(words),freq=words)
wordcloud(words = df_wc$word, freq = df_wc$freq, min.freq = 1,max.words=200, random.order=FALSE, rot.per=0.35,colors=brewer.pal(8, "Dark2"))
#Johnson_2016
#vector containing only the text
text <- df_Johnson_2016$text
#corpus
docs <- Corpus(VectorSource(text))
docs <- docs %>%
tm_map(removeNumbers) %>%
tm_map(removePunctuation) %>%
tm_map(stripWhitespace)
Warning: transformation drops documentsWarning: transformation drops documentsWarning: transformation drops documents
docs <- tm_map(docs, content_transformer(tolower))
Warning: transformation drops documents
docs <- tm_map(docs, removeWords, stopwords("english"))
Warning: transformation drops documents
dtm <- TermDocumentMatrix(docs)
matrix <- as.matrix(dtm)
words <- sort(rowSums(matrix),decreasing=TRUE)
df_wc <- data.frame(word = names(words),freq=words)
wordcloud(words = df_wc$word, freq = df_wc$freq, min.freq = 1,max.words=200, random.order=FALSE, rot.per=0.35,colors=brewer.pal(8, "Dark2"))

#Johnson 2017
#vector containing only the text
text <- df_Johnson_2017$text
#corpus
docs <- Corpus(VectorSource(text))
docs <- docs %>%
tm_map(removeNumbers) %>%
tm_map(removePunctuation) %>%
tm_map(stripWhitespace)
Warning: transformation drops documentsWarning: transformation drops documentsWarning: transformation drops documents
docs <- tm_map(docs, content_transformer(tolower))
Warning: transformation drops documents
docs <- tm_map(docs, removeWords, stopwords("english"))
Warning: transformation drops documents
dtm <- TermDocumentMatrix(docs)
matrix <- as.matrix(dtm)
words <- sort(rowSums(matrix),decreasing=TRUE)
df_wc <- data.frame(word = names(words),freq=words)
wordcloud(words = df_wc$word, freq = df_wc$freq, min.freq = 1,max.words=200, random.order=FALSE, rot.per=0.35,colors=brewer.pal(8, "Dark2"))

#Nasdaq
#vector containing only the text
text <- df_NASDAQ$text
#corpus
docs <- Corpus(VectorSource(text))
docs <- docs %>%
tm_map(removeNumbers) %>%
tm_map(removePunctuation) %>%
tm_map(stripWhitespace)
Warning: transformation drops documentsWarning: transformation drops documentsWarning: transformation drops documents
docs <- tm_map(docs, content_transformer(tolower))
Warning: transformation drops documents
docs <- tm_map(docs, removeWords, stopwords("english"))
Warning: transformation drops documents
dtm <- TermDocumentMatrix(docs)
matrix <- as.matrix(dtm)
words <- sort(rowSums(matrix),decreasing=TRUE)
df_wc <- data.frame(word = names(words),freq=words)
wordcloud(words = df_wc$word, freq = df_wc$freq, min.freq = 1,max.words=200, random.order=FALSE, rot.per=0.35,colors=brewer.pal(8, "Dark2"))

#Autodesk
#vector containing only the text
text <- df_autodesk$text
#corpus
docs <- Corpus(VectorSource(text))
docs <- docs %>%
tm_map(removeNumbers) %>%
tm_map(removePunctuation) %>%
tm_map(stripWhitespace)
Warning: transformation drops documentsWarning: transformation drops documentsWarning: transformation drops documents
docs <- tm_map(docs, content_transformer(tolower))
Warning: transformation drops documents
docs <- tm_map(docs, removeWords, stopwords("english"))
Warning: transformation drops documents
dtm <- TermDocumentMatrix(docs)
matrix <- as.matrix(dtm)
words <- sort(rowSums(matrix),decreasing=TRUE)
df_wc <- data.frame(word = names(words),freq=words)
wordcloud(words = df_wc$word, freq = df_wc$freq, min.freq = 1,max.words=200, random.order=FALSE, rot.per=0.35,colors=brewer.pal(8, "Dark2"))

#Hasbro
#vector containing only the text
text <- df_Hasbro_2016$text
#corpus
docs <- Corpus(VectorSource(text))
docs <- docs %>%
tm_map(removeNumbers) %>%
tm_map(removePunctuation) %>%
tm_map(stripWhitespace)
Warning: transformation drops documentsWarning: transformation drops documentsWarning: transformation drops documents
docs <- tm_map(docs, content_transformer(tolower))
Warning: transformation drops documents
docs <- tm_map(docs, removeWords, stopwords("english"))
Warning: transformation drops documents
dtm <- TermDocumentMatrix(docs)
matrix <- as.matrix(dtm)
words <- sort(rowSums(matrix),decreasing=TRUE)
df_wc <- data.frame(word = names(words),freq=words)
wordcloud(words = df_wc$word, freq = df_wc$freq, min.freq = 1,max.words=200, random.order=FALSE, rot.per=0.35,colors=brewer.pal(8, "Dark2"))

#vector containing only the text
text <- df_Hasbro_2018$text
#corpus
docs <- Corpus(VectorSource(text))
docs <- docs %>%
tm_map(removeNumbers) %>%
tm_map(removePunctuation) %>%
tm_map(stripWhitespace)
Warning: transformation drops documentsWarning: transformation drops documentsWarning: transformation drops documents
docs <- tm_map(docs, content_transformer(tolower))
Warning: transformation drops documents
docs <- tm_map(docs, removeWords, stopwords("english"))
Warning: transformation drops documents
dtm <- TermDocumentMatrix(docs)
matrix <- as.matrix(dtm)
words <- sort(rowSums(matrix),decreasing=TRUE)
df_wc <- data.frame(word = names(words),freq=words)
wordcloud(words = df_wc$word, freq = df_wc$freq, min.freq = 1,max.words=200, random.order=FALSE, rot.per=0.35,colors=brewer.pal(8, "Dark2"))

#Intel 2016
#vector containing only the text
text <- df_Intel_2016$text
#corpus
docs <- Corpus(VectorSource(text))
docs <- docs %>%
tm_map(removeNumbers) %>%
tm_map(removePunctuation) %>%
tm_map(stripWhitespace)
Warning: transformation drops documentsWarning: transformation drops documentsWarning: transformation drops documents
docs <- tm_map(docs, content_transformer(tolower))
Warning: transformation drops documents
docs <- tm_map(docs, removeWords, stopwords("english"))
Warning: transformation drops documents
dtm <- TermDocumentMatrix(docs)
matrix <- as.matrix(dtm)
words <- sort(rowSums(matrix),decreasing=TRUE)
df_wc <- data.frame(word = names(words),freq=words)
wordcloud(words = df_wc$word, freq = df_wc$freq, min.freq = 1,max.words=200, random.order=FALSE, rot.per=0.35,colors=brewer.pal(8, "Dark2"))

#Intel 2018
#vector containing only the text
text <- df_Intel_2018$text
#corpus
docs <- Corpus(VectorSource(text))
docs <- docs %>%
tm_map(removeNumbers) %>%
tm_map(removePunctuation) %>%
tm_map(stripWhitespace)
Warning: transformation drops documentsWarning: transformation drops documentsWarning: transformation drops documents
docs <- tm_map(docs, content_transformer(tolower))
Warning: transformation drops documents
docs <- tm_map(docs, removeWords, stopwords("english"))
Warning: transformation drops documents
dtm <- TermDocumentMatrix(docs)
matrix <- as.matrix(dtm)
words <- sort(rowSums(matrix),decreasing=TRUE)
df_wc <- data.frame(word = names(words),freq=words)
wordcloud(words = df_wc$word, freq = df_wc$freq, min.freq = 1,max.words=200, random.order=FALSE, rot.per=0.35,colors=brewer.pal(8, "Dark2"))

#Activision 2016
#vector containing only the text
text <- df_Activision_2016$text
#corpus
docs <- Corpus(VectorSource(text))
docs <- docs %>%
tm_map(removeNumbers) %>%
tm_map(removePunctuation) %>%
tm_map(stripWhitespace)
Warning: transformation drops documentsWarning: transformation drops documentsWarning: transformation drops documents
docs <- tm_map(docs, content_transformer(tolower))
Warning: transformation drops documents
docs <- tm_map(docs, removeWords, stopwords("english"))
Warning: transformation drops documents
dtm <- TermDocumentMatrix(docs)
matrix <- as.matrix(dtm)
words <- sort(rowSums(matrix),decreasing=TRUE)
df_wc <- data.frame(word = names(words),freq=words)
wordcloud(words = df_wc$word, freq = df_wc$freq, min.freq = 1,max.words=200, random.order=FALSE, rot.per=0.35,colors=brewer.pal(8, "Dark2"))

#Activision 2016
#vector containing only the text
text <- df_Activision_2018$text
#corpus
docs <- Corpus(VectorSource(text))
docs <- docs %>%
tm_map(removeNumbers) %>%
tm_map(removePunctuation) %>%
tm_map(stripWhitespace)
Warning: transformation drops documentsWarning: transformation drops documentsWarning: transformation drops documents
docs <- tm_map(docs, content_transformer(tolower))
Warning: transformation drops documents
docs <- tm_map(docs, removeWords, stopwords("english"))
Warning: transformation drops documents
dtm <- TermDocumentMatrix(docs)
matrix <- as.matrix(dtm)
words <- sort(rowSums(matrix),decreasing=TRUE)
df_wc <- data.frame(word = names(words),freq=words)
wordcloud(words = df_wc$word, freq = df_wc$freq, min.freq = 1,max.words=200, random.order=FALSE, rot.per=0.35,colors=brewer.pal(8, "Dark2"))

#Inuit 2016
#vector containing only the text
text <- df_Inuit_2016$text
#corpus
docs <- Corpus(VectorSource(text))
docs <- docs %>%
tm_map(removeNumbers) %>%
tm_map(removePunctuation) %>%
tm_map(stripWhitespace)
Warning: transformation drops documentsWarning: transformation drops documentsWarning: transformation drops documents
docs <- tm_map(docs, content_transformer(tolower))
Warning: transformation drops documents
docs <- tm_map(docs, removeWords, stopwords("english"))
Warning: transformation drops documents
dtm <- TermDocumentMatrix(docs)
matrix <- as.matrix(dtm)
words <- sort(rowSums(matrix),decreasing=TRUE)
df_wc <- data.frame(word = names(words),freq=words)
wordcloud(words = df_wc$word, freq = df_wc$freq, min.freq = 1,max.words=200, random.order=FALSE, rot.per=0.35,colors=brewer.pal(8, "Dark2"))

#Inuit 2018
#vector containing only the text
text <- df_Inuit_2018$text
#corpus
docs <- Corpus(VectorSource(text))
docs <- docs %>%
tm_map(removeNumbers) %>%
tm_map(removePunctuation) %>%
tm_map(stripWhitespace)
Warning: transformation drops documentsWarning: transformation drops documentsWarning: transformation drops documents
docs <- tm_map(docs, content_transformer(tolower))
Warning: transformation drops documents
docs <- tm_map(docs, removeWords, stopwords("english"))
Warning: transformation drops documents
dtm <- TermDocumentMatrix(docs)
matrix <- as.matrix(dtm)
words <- sort(rowSums(matrix),decreasing=TRUE)
df_wc <- data.frame(word = names(words),freq=words)
wordcloud(words = df_wc$word, freq = df_wc$freq, min.freq = 1,max.words=200, random.order=FALSE, rot.per=0.35,colors=brewer.pal(8, "Dark2"))

#Allergan 2016
#vector containing only the text
text <- df_Allergan_2016$text
#corpus
docs <- Corpus(VectorSource(text))
docs <- docs %>%
tm_map(removeNumbers) %>%
tm_map(removePunctuation) %>%
tm_map(stripWhitespace)
Warning: transformation drops documentsWarning: transformation drops documentsWarning: transformation drops documents
docs <- tm_map(docs, content_transformer(tolower))
Warning: transformation drops documents
docs <- tm_map(docs, removeWords, stopwords("english"))
Warning: transformation drops documents
dtm <- TermDocumentMatrix(docs)
matrix <- as.matrix(dtm)
words <- sort(rowSums(matrix),decreasing=TRUE)
df_wc <- data.frame(word = names(words),freq=words)
wordcloud(words = df_wc$word, freq = df_wc$freq, min.freq = 1,max.words=200, random.order=FALSE, rot.per=0.35,colors=brewer.pal(8, "Dark2"))

#Allergan 2018
#vector containing only the text
text <- df_Allergan_2018$text
#corpus
docs <- Corpus(VectorSource(text))
docs <- docs %>%
tm_map(removeNumbers) %>%
tm_map(removePunctuation) %>%
tm_map(stripWhitespace)
Warning: transformation drops documentsWarning: transformation drops documentsWarning: transformation drops documents
docs <- tm_map(docs, content_transformer(tolower))
Warning: transformation drops documents
docs <- tm_map(docs, removeWords, stopwords("english"))
Warning: transformation drops documents
dtm <- TermDocumentMatrix(docs)
matrix <- as.matrix(dtm)
words <- sort(rowSums(matrix),decreasing=TRUE)
df_wc <- data.frame(word = names(words),freq=words)
wordcloud(words = df_wc$word, freq = df_wc$freq, min.freq = 1,max.words=200, random.order=FALSE, rot.per=0.35,colors=brewer.pal(8, "Dark2"))

#Humana 2016
#vector containing only the text
text <- df_Humana_2016$text
#corpus
docs <- Corpus(VectorSource(text))
docs <- docs %>%
tm_map(removeNumbers) %>%
tm_map(removePunctuation) %>%
tm_map(stripWhitespace)
Warning: transformation drops documentsWarning: transformation drops documentsWarning: transformation drops documents
docs <- tm_map(docs, content_transformer(tolower))
Warning: transformation drops documents
docs <- tm_map(docs, removeWords, stopwords("english"))
Warning: transformation drops documents
dtm <- TermDocumentMatrix(docs)
matrix <- as.matrix(dtm)
words <- sort(rowSums(matrix),decreasing=TRUE)
df_wc <- data.frame(word = names(words),freq=words)
wordcloud(words = df_wc$word, freq = df_wc$freq, min.freq = 1,max.words=200, random.order=FALSE, rot.per=0.35,colors=brewer.pal(8, "Dark2"))

#Humana 2018
#vector containing only the text
text <- df_Humana_2018$text
#corpus
docs <- Corpus(VectorSource(text))
docs <- docs %>%
tm_map(removeNumbers) %>%
tm_map(removePunctuation) %>%
tm_map(stripWhitespace)
Warning: transformation drops documentsWarning: transformation drops documentsWarning: transformation drops documents
docs <- tm_map(docs, content_transformer(tolower))
Warning: transformation drops documents
docs <- tm_map(docs, removeWords, stopwords("english"))
Warning: transformation drops documents
dtm <- TermDocumentMatrix(docs)
matrix <- as.matrix(dtm)
words <- sort(rowSums(matrix),decreasing=TRUE)
df_wc <- data.frame(word = names(words),freq=words)
wordcloud(words = df_wc$word, freq = df_wc$freq, min.freq = 1,max.words=200, random.order=FALSE, rot.per=0.35,colors=brewer.pal(8, "Dark2"))

#Autodesk CB 2016
#vector containing only the text
text <- df_CB_autodesk_2016$text
#corpus
docs <- Corpus(VectorSource(text))
docs <- docs %>%
tm_map(removeNumbers) %>%
tm_map(removePunctuation) %>%
tm_map(stripWhitespace)
Warning: transformation drops documentsWarning: transformation drops documentsWarning: transformation drops documents
docs <- tm_map(docs, content_transformer(tolower))
Warning: transformation drops documents
docs <- tm_map(docs, removeWords, stopwords("english"))
Warning: transformation drops documents
dtm <- TermDocumentMatrix(docs)
matrix <- as.matrix(dtm)
words <- sort(rowSums(matrix),decreasing=TRUE)
df_wc <- data.frame(word = names(words),freq=words)
wordcloud(words = df_wc$word, freq = df_wc$freq, min.freq = 1,max.words=200, random.order=FALSE, rot.per=0.35,colors=brewer.pal(8, "Dark2"))

#Autodesk 2018
#vector containing only the text
text <- df_CB_autodesk_2018$text
#corpus
docs <- Corpus(VectorSource(text))
docs <- docs %>%
tm_map(removeNumbers) %>%
tm_map(removePunctuation) %>%
tm_map(stripWhitespace)
Warning: transformation drops documentsWarning: transformation drops documentsWarning: transformation drops documents
docs <- tm_map(docs, content_transformer(tolower))
Warning: transformation drops documents
docs <- tm_map(docs, removeWords, stopwords("english"))
Warning: transformation drops documents
dtm <- TermDocumentMatrix(docs)
matrix <- as.matrix(dtm)
words <- sort(rowSums(matrix),decreasing=TRUE)
df_wc <- data.frame(word = names(words),freq=words)
wordcloud(words = df_wc$word, freq = df_wc$freq, min.freq = 1,max.words=200, random.order=FALSE, rot.per=0.35,colors=brewer.pal(8, "Dark2"))

#Equinox 2018
#vector containing only the text
text <- df_equinox_2018$text
#corpus
docs <- Corpus(VectorSource(text))
docs <- docs %>%
tm_map(removeNumbers) %>%
tm_map(removePunctuation) %>%
tm_map(stripWhitespace)
Warning: transformation drops documentsWarning: transformation drops documentsWarning: transformation drops documents
docs <- tm_map(docs, content_transformer(tolower))
Warning: transformation drops documents
docs <- tm_map(docs, removeWords, stopwords("english"))
Warning: transformation drops documents
dtm <- TermDocumentMatrix(docs)
matrix <- as.matrix(dtm)
words <- sort(rowSums(matrix),decreasing=TRUE)
df_wc <- data.frame(word = names(words),freq=words)
wordcloud(words = df_wc$word, freq = df_wc$freq, min.freq = 1,max.words=200, random.order=FALSE, rot.per=0.35,colors=brewer.pal(8, "Dark2"))

#Cisco 2016
#vector containing only the text
text <- df_cisco_2016$text
#corpus
docs <- Corpus(VectorSource(text))
docs <- docs %>%
tm_map(removeNumbers) %>%
tm_map(removePunctuation) %>%
tm_map(stripWhitespace)
Warning: transformation drops documentsWarning: transformation drops documentsWarning: transformation drops documents
docs <- tm_map(docs, content_transformer(tolower))
Warning: transformation drops documents
docs <- tm_map(docs, removeWords, stopwords("english"))
Warning: transformation drops documents
dtm <- TermDocumentMatrix(docs)
matrix <- as.matrix(dtm)
words <- sort(rowSums(matrix),decreasing=TRUE)
df_wc <- data.frame(word = names(words),freq=words)
wordcloud(words = df_wc$word, freq = df_wc$freq, min.freq = 1,max.words=200, random.order=FALSE, rot.per=0.35,colors=brewer.pal(8, "Dark2"))

#Cisco 2018
#vector containing only the text
text <- df_cisco_2018$text
#corpus
docs <- Corpus(VectorSource(text))
docs <- docs %>%
tm_map(removeNumbers) %>%
tm_map(removePunctuation) %>%
tm_map(stripWhitespace)
Warning: transformation drops documentsWarning: transformation drops documentsWarning: transformation drops documents
docs <- tm_map(docs, content_transformer(tolower))
Warning: transformation drops documents
docs <- tm_map(docs, removeWords, stopwords("english"))
Warning: transformation drops documents
dtm <- TermDocumentMatrix(docs)
matrix <- as.matrix(dtm)
words <- sort(rowSums(matrix),decreasing=TRUE)
df_wc <- data.frame(word = names(words),freq=words)
wordcloud(words = df_wc$word, freq = df_wc$freq, min.freq = 1,max.words=200, random.order=FALSE, rot.per=0.35,colors=brewer.pal(8, "Dark2"))

#EBAY 2016
#vector containing only the text
text <- df_EBAY_2016$text
#corpus
docs <- Corpus(VectorSource(text))
docs <- docs %>%
tm_map(removeNumbers) %>%
tm_map(removePunctuation) %>%
tm_map(stripWhitespace)
Warning: transformation drops documentsWarning: transformation drops documentsWarning: transformation drops documents
docs <- tm_map(docs, content_transformer(tolower))
Warning: transformation drops documents
docs <- tm_map(docs, removeWords, stopwords("english"))
Warning: transformation drops documents
dtm <- TermDocumentMatrix(docs)
matrix <- as.matrix(dtm)
words <- sort(rowSums(matrix),decreasing=TRUE)
df_wc <- data.frame(word = names(words),freq=words)
wordcloud(words = df_wc$word, freq = df_wc$freq, min.freq = 1,max.words=200, random.order=FALSE, rot.per=0.35,colors=brewer.pal(8, "Dark2"))

#vector containing only the text
text <- df_EBAY_2018$text
#corpus
docs <- Corpus(VectorSource(text))
docs <- docs %>%
tm_map(removeNumbers) %>%
tm_map(removePunctuation) %>%
tm_map(stripWhitespace)
Warning: transformation drops documentsWarning: transformation drops documentsWarning: transformation drops documents
docs <- tm_map(docs, content_transformer(tolower))
Warning: transformation drops documents
docs <- tm_map(docs, removeWords, stopwords("english"))
Warning: transformation drops documents
dtm <- TermDocumentMatrix(docs)
matrix <- as.matrix(dtm)
words <- sort(rowSums(matrix),decreasing=TRUE)
df_wc <- data.frame(word = names(words),freq=words)
wordcloud(words = df_wc$word, freq = df_wc$freq, min.freq = 1,max.words=200, random.order=FALSE, rot.per=0.35,colors=brewer.pal(8, "Dark2"))

#Davita 2016
#vector containing only the text
text <- df_Davita_2016$text
#corpus
docs <- Corpus(VectorSource(text))
docs <- docs %>%
tm_map(removeNumbers) %>%
tm_map(removePunctuation) %>%
tm_map(stripWhitespace)
Warning: transformation drops documentsWarning: transformation drops documentsWarning: transformation drops documents
docs <- tm_map(docs, content_transformer(tolower))
Warning: transformation drops documents
docs <- tm_map(docs, removeWords, stopwords("english"))
Warning: transformation drops documents
dtm <- TermDocumentMatrix(docs)
matrix <- as.matrix(dtm)
words <- sort(rowSums(matrix),decreasing=TRUE)
df_wc <- data.frame(word = names(words),freq=words)
wordcloud(words = df_wc$word, freq = df_wc$freq, min.freq = 1,max.words=200, random.order=FALSE, rot.per=0.35,colors=brewer.pal(8, "Dark2"))

#Davita 2018
#vector containing only the text
text <- df_Davita_2017$text
#corpus
docs <- Corpus(VectorSource(text))
docs <- docs %>%
tm_map(removeNumbers) %>%
tm_map(removePunctuation) %>%
tm_map(stripWhitespace)
Warning: transformation drops documentsWarning: transformation drops documentsWarning: transformation drops documents
docs <- tm_map(docs, content_transformer(tolower))
Warning: transformation drops documents
docs <- tm_map(docs, removeWords, stopwords("english"))
Warning: transformation drops documents
dtm <- TermDocumentMatrix(docs)
matrix <- as.matrix(dtm)
words <- sort(rowSums(matrix),decreasing=TRUE)
df_wc <- data.frame(word = names(words),freq=words)
wordcloud(words = df_wc$word, freq = df_wc$freq, min.freq = 1,max.words=200, random.order=FALSE, rot.per=0.35,colors=brewer.pal(8, "Dark2"))

#Illumina 2016
#vector containing only the text
text <- df_Illumina_2016$text
#corpus
docs <- Corpus(VectorSource(text))
docs <- docs %>%
tm_map(removeNumbers) %>%
tm_map(removePunctuation) %>%
tm_map(stripWhitespace)
Warning: transformation drops documentsWarning: transformation drops documentsWarning: transformation drops documents
docs <- tm_map(docs, content_transformer(tolower))
Warning: transformation drops documents
docs <- tm_map(docs, removeWords, stopwords("english"))
Warning: transformation drops documents
dtm <- TermDocumentMatrix(docs)
matrix <- as.matrix(dtm)
words <- sort(rowSums(matrix),decreasing=TRUE)
df_wc <- data.frame(word = names(words),freq=words)
wordcloud(words = df_wc$word, freq = df_wc$freq, min.freq = 1,max.words=200, random.order=FALSE, rot.per=0.35,colors=brewer.pal(8, "Dark2"))

#Illumina 2018
#vector containing only the text
text <- df_Illumina_2018$text
#corpus
docs <- Corpus(VectorSource(text))
docs <- docs %>%
tm_map(removeNumbers) %>%
tm_map(removePunctuation) %>%
tm_map(stripWhitespace)
Warning: transformation drops documentsWarning: transformation drops documentsWarning: transformation drops documents
docs <- tm_map(docs, content_transformer(tolower))
Warning: transformation drops documents
docs <- tm_map(docs, removeWords, stopwords("english"))
Warning: transformation drops documents
dtm <- TermDocumentMatrix(docs)
matrix <- as.matrix(dtm)
words <- sort(rowSums(matrix),decreasing=TRUE)
df_wc <- data.frame(word = names(words),freq=words)
wordcloud(words = df_wc$word, freq = df_wc$freq, min.freq = 1,max.words=200, random.order=FALSE, rot.per=0.35,colors=brewer.pal(8, "Dark2"))

#Homedepot
#vector containing only the text
text <- df_Homedepot_2013$text
#corpus
docs <- Corpus(VectorSource(text))
docs <- docs %>%
tm_map(removeNumbers) %>%
tm_map(removePunctuation) %>%
tm_map(stripWhitespace)
Warning: transformation drops documentsWarning: transformation drops documentsWarning: transformation drops documents
docs <- tm_map(docs, content_transformer(tolower))
Warning: transformation drops documents
docs <- tm_map(docs, removeWords, stopwords("english"))
Warning: transformation drops documents
dtm <- TermDocumentMatrix(docs)
matrix <- as.matrix(dtm)
words <- sort(rowSums(matrix),decreasing=TRUE)
df_wc <- data.frame(word = names(words),freq=words)
wordcloud(words = df_wc$word, freq = df_wc$freq, min.freq = 1,max.words=200, random.order=FALSE, rot.per=0.35,colors=brewer.pal(8, "Dark2"))

#Southwest airlines 2016
#vector containing only the text
text <- df_Southwest_2016$text
#corpus
docs <- Corpus(VectorSource(text))
docs <- docs %>%
tm_map(removeNumbers) %>%
tm_map(removePunctuation) %>%
tm_map(stripWhitespace)
Warning: transformation drops documentsWarning: transformation drops documentsWarning: transformation drops documents
docs <- tm_map(docs, content_transformer(tolower))
Warning: transformation drops documents
docs <- tm_map(docs, removeWords, stopwords("english"))
Warning: transformation drops documents
dtm <- TermDocumentMatrix(docs)
matrix <- as.matrix(dtm)
words <- sort(rowSums(matrix),decreasing=TRUE)
df_wc <- data.frame(word = names(words),freq=words)
wordcloud(words = df_wc$word, freq = df_wc$freq, min.freq = 1,max.words=200, random.order=FALSE, rot.per=0.35,colors=brewer.pal(8, "Dark2"))

#Southwest Airlines 2018
#vector containing only the text
text <- df_Southwest_2018$text
#corpus
docs <- Corpus(VectorSource(text))
docs <- docs %>%
tm_map(removeNumbers) %>%
tm_map(removePunctuation) %>%
tm_map(stripWhitespace)
Warning: transformation drops documentsWarning: transformation drops documentsWarning: transformation drops documents
docs <- tm_map(docs, content_transformer(tolower))
Warning: transformation drops documents
docs <- tm_map(docs, removeWords, stopwords("english"))
Warning: transformation drops documents
dtm <- TermDocumentMatrix(docs)
matrix <- as.matrix(dtm)
words <- sort(rowSums(matrix),decreasing=TRUE)
df_wc <- data.frame(word = names(words),freq=words)
wordcloud(words = df_wc$word, freq = df_wc$freq, min.freq = 1,max.words=200, random.order=FALSE, rot.per=0.35,colors=brewer.pal(8, "Dark2"))

#FIS 2016
#vector containing only the text
text <- df_FIS_2016$text
#corpus
docs <- Corpus(VectorSource(text))
docs <- docs %>%
tm_map(removeNumbers) %>%
tm_map(removePunctuation) %>%
tm_map(stripWhitespace)
Warning: transformation drops documentsWarning: transformation drops documentsWarning: transformation drops documents
docs <- tm_map(docs, content_transformer(tolower))
Warning: transformation drops documents
docs <- tm_map(docs, removeWords, stopwords("english"))
Warning: transformation drops documents
dtm <- TermDocumentMatrix(docs)
matrix <- as.matrix(dtm)
words <- sort(rowSums(matrix),decreasing=TRUE)
df_wc <- data.frame(word = names(words),freq=words)
wordcloud(words = df_wc$word, freq = df_wc$freq, min.freq = 1,max.words=200, random.order=FALSE, rot.per=0.35,colors=brewer.pal(8, "Dark2"))

#FIS 2018
#vector containing only the text
text <- df_FIS_2018$text
#corpus
docs <- Corpus(VectorSource(text))
docs <- docs %>%
tm_map(removeNumbers) %>%
tm_map(removePunctuation) %>%
tm_map(stripWhitespace)
Warning: transformation drops documentsWarning: transformation drops documentsWarning: transformation drops documents
docs <- tm_map(docs, content_transformer(tolower))
Warning: transformation drops documents
docs <- tm_map(docs, removeWords, stopwords("english"))
Warning: transformation drops documents
dtm <- TermDocumentMatrix(docs)
matrix <- as.matrix(dtm)
words <- sort(rowSums(matrix),decreasing=TRUE)
df_wc <- data.frame(word = names(words),freq=words)
wordcloud(words = df_wc$word, freq = df_wc$freq, min.freq = 1,max.words=200, random.order=FALSE, rot.per=0.35,colors=brewer.pal(8, "Dark2"))

#Leucadia Nation
#vector containing only the text
text <- df_Leucadia_2016$text
#corpus
docs <- Corpus(VectorSource(text))
docs <- docs %>%
tm_map(removeNumbers) %>%
tm_map(removePunctuation) %>%
tm_map(stripWhitespace)
Warning: transformation drops documentsWarning: transformation drops documentsWarning: transformation drops documents
docs <- tm_map(docs, content_transformer(tolower))
Warning: transformation drops documents
docs <- tm_map(docs, removeWords, stopwords("english"))
Warning: transformation drops documents
dtm <- TermDocumentMatrix(docs)
matrix <- as.matrix(dtm)
words <- sort(rowSums(matrix),decreasing=TRUE)
df_wc <- data.frame(word = names(words),freq=words)
wordcloud(words = df_wc$word, freq = df_wc$freq, min.freq = 1,max.words=200, random.order=FALSE, rot.per=0.35,colors=brewer.pal(8, "Dark2"))

#Leucadia Nation 2018
#vector containing only the text
text <- df_Leucadia_2018$text
#corpus
docs <- Corpus(VectorSource(text))
docs <- docs %>%
tm_map(removeNumbers) %>%
tm_map(removePunctuation) %>%
tm_map(stripWhitespace)
Warning: transformation drops documentsWarning: transformation drops documentsWarning: transformation drops documents
docs <- tm_map(docs, content_transformer(tolower))
Warning: transformation drops documents
docs <- tm_map(docs, removeWords, stopwords("english"))
Warning: transformation drops documents
dtm <- TermDocumentMatrix(docs)
matrix <- as.matrix(dtm)
words <- sort(rowSums(matrix),decreasing=TRUE)
df_wc <- data.frame(word = names(words),freq=words)
wordcloud(words = df_wc$word, freq = df_wc$freq, min.freq = 1,max.words=200, random.order=FALSE, rot.per=0.35,colors=brewer.pal(8, "Dark2"))

#Verizon
#vector containing only the text
text <- df_Verizon_2018$text
#corpus
docs <- Corpus(VectorSource(text))
docs <- docs %>%
tm_map(removeNumbers) %>%
tm_map(removePunctuation) %>%
tm_map(stripWhitespace)
Warning: transformation drops documentsWarning: transformation drops documentsWarning: transformation drops documents
docs <- tm_map(docs, content_transformer(tolower))
Warning: transformation drops documents
docs <- tm_map(docs, removeWords, stopwords("english"))
Warning: transformation drops documents
dtm <- TermDocumentMatrix(docs)
matrix <- as.matrix(dtm)
words <- sort(rowSums(matrix),decreasing=TRUE)
df_wc <- data.frame(word = names(words),freq=words)
wordcloud(words = df_wc$word, freq = df_wc$freq, min.freq = 1,max.words=200, random.order=FALSE, rot.per=0.35,colors=brewer.pal(8, "Dark2"))

text <- df_WU_2016$text
#corpus
docs <- Corpus(VectorSource(text))
docs <- docs %>%
tm_map(removeNumbers) %>%
tm_map(removePunctuation) %>%
tm_map(stripWhitespace)
Warning: transformation drops documentsWarning: transformation drops documentsWarning: transformation drops documents
docs <- tm_map(docs, content_transformer(tolower))
Warning: transformation drops documents
docs <- tm_map(docs, removeWords, stopwords("english"))
Warning: transformation drops documents
dtm <- TermDocumentMatrix(docs)
matrix <- as.matrix(dtm)
words <- sort(rowSums(matrix),decreasing=TRUE)
df_wc <- data.frame(word = names(words),freq=words)
wordcloud(words = df_wc$word, freq = df_wc$freq, min.freq = 1,max.words=200, random.order=FALSE, rot.per=0.35,colors=brewer.pal(8, "Dark2"))

#Western Union 2018
#vector containing only the text
text <- df_WU_2018$text
#corpus
docs <- Corpus(VectorSource(text))
docs <- docs %>%
tm_map(removeNumbers) %>%
tm_map(removePunctuation) %>%
tm_map(stripWhitespace)
Warning: transformation drops documentsWarning: transformation drops documentsWarning: transformation drops documents
docs <- tm_map(docs, content_transformer(tolower))
Warning: transformation drops documents
docs <- tm_map(docs, removeWords, stopwords("english"))
Warning: transformation drops documents
dtm <- TermDocumentMatrix(docs)
matrix <- as.matrix(dtm)
words <- sort(rowSums(matrix),decreasing=TRUE)
df_wc <- data.frame(word = names(words),freq=words)
wordcloud(words = df_wc$word, freq = df_wc$freq, min.freq = 1,max.words=200, random.order=FALSE, rot.per=0.35,colors=brewer.pal(8, "Dark2"))

#RedHat 2016
#vector containing only the text
text <- df_RedHat_2016$text
#corpus
docs <- Corpus(VectorSource(text))
docs <- docs %>%
tm_map(removeNumbers) %>%
tm_map(removePunctuation) %>%
tm_map(stripWhitespace)
Warning: transformation drops documentsWarning: transformation drops documentsWarning: transformation drops documents
docs <- tm_map(docs, content_transformer(tolower))
Warning: transformation drops documents
docs <- tm_map(docs, removeWords, stopwords("english"))
Warning: transformation drops documents
dtm <- TermDocumentMatrix(docs)
matrix <- as.matrix(dtm)
words <- sort(rowSums(matrix),decreasing=TRUE)
df_wc <- data.frame(word = names(words),freq=words)
wordcloud(words = df_wc$word, freq = df_wc$freq, min.freq = 1,max.words=200, random.order=FALSE, rot.per=0.35,colors=brewer.pal(8, "Dark2"))

#RedHat 2018
#vector containing only the text
text <- df_RedHat_2018$text
#corpus
docs <- Corpus(VectorSource(text))
docs <- docs %>%
tm_map(removeNumbers) %>%
tm_map(removePunctuation) %>%
tm_map(stripWhitespace)
Warning: transformation drops documentsWarning: transformation drops documentsWarning: transformation drops documents
docs <- tm_map(docs, content_transformer(tolower))
Warning: transformation drops documents
docs <- tm_map(docs, removeWords, stopwords("english"))
Warning: transformation drops documents
dtm <- TermDocumentMatrix(docs)
matrix <- as.matrix(dtm)
words <- sort(rowSums(matrix),decreasing=TRUE)
df_wc <- data.frame(word = names(words),freq=words)
wordcloud(words = df_wc$word, freq = df_wc$freq, min.freq = 1,max.words=200, random.order=FALSE, rot.per=0.35,colors=brewer.pal(8, "Dark2"))

#Amazon 2016
#vector containing only the text
text <- df_AMZN_2016$text
#corpus
docs <- Corpus(VectorSource(text))
docs <- docs %>%
tm_map(removeNumbers) %>%
tm_map(removePunctuation) %>%
tm_map(stripWhitespace)
Warning: transformation drops documentsWarning: transformation drops documentsWarning: transformation drops documents
docs <- tm_map(docs, content_transformer(tolower))
Warning: transformation drops documents
docs <- tm_map(docs, removeWords, stopwords("english"))
Warning: transformation drops documents
dtm <- TermDocumentMatrix(docs)
matrix <- as.matrix(dtm)
words <- sort(rowSums(matrix),decreasing=TRUE)
df_wc <- data.frame(word = names(words),freq=words)
wordcloud(words = df_wc$word, freq = df_wc$freq, min.freq = 1,max.words=200, random.order=FALSE, rot.per=0.35,colors=brewer.pal(8, "Dark2"))

text <- df_AMZN_2018$text
#corpus
docs <- Corpus(VectorSource(text))
docs <- docs %>%
tm_map(removeNumbers) %>%
tm_map(removePunctuation) %>%
tm_map(stripWhitespace)
Warning: transformation drops documentsWarning: transformation drops documentsWarning: transformation drops documents
docs <- tm_map(docs, content_transformer(tolower))
Warning: transformation drops documents
docs <- tm_map(docs, removeWords, stopwords("english"))
Warning: transformation drops documents
dtm <- TermDocumentMatrix(docs)
matrix <- as.matrix(dtm)
words <- sort(rowSums(matrix),decreasing=TRUE)
df_wc <- data.frame(word = names(words),freq=words)
wordcloud(words = df_wc$word, freq = df_wc$freq, min.freq = 1,max.words=200, random.order=FALSE, rot.per=0.35,colors=brewer.pal(8, "Dark2"))

#vector containing only the text
text <- df_GE_2016$text
#corpus
docs <- Corpus(VectorSource(text))
docs <- docs %>%
tm_map(removeNumbers) %>%
tm_map(removePunctuation) %>%
tm_map(stripWhitespace)
Warning: transformation drops documentsWarning: transformation drops documentsWarning: transformation drops documents
docs <- tm_map(docs, content_transformer(tolower))
Warning: transformation drops documents
docs <- tm_map(docs, removeWords, stopwords("english"))
Warning: transformation drops documents
dtm <- TermDocumentMatrix(docs)
matrix <- as.matrix(dtm)
words <- sort(rowSums(matrix),decreasing=TRUE)
df_wc <- data.frame(word = names(words),freq=words)
wordcloud(words = df_wc$word, freq = df_wc$freq, min.freq = 1,max.words=200, random.order=FALSE, rot.per=0.35,colors=brewer.pal(8, "Dark2"))

#GE 2017
#vector containing only the text
text <- df_GE_2017$text
#corpus
docs <- Corpus(VectorSource(text))
docs <- docs %>%
tm_map(removeNumbers) %>%
tm_map(removePunctuation) %>%
tm_map(stripWhitespace)
Warning: transformation drops documentsWarning: transformation drops documentsWarning: transformation drops documents
docs <- tm_map(docs, content_transformer(tolower))
Warning: transformation drops documents
docs <- tm_map(docs, removeWords, stopwords("english"))
Warning: transformation drops documents
dtm <- TermDocumentMatrix(docs)
matrix <- as.matrix(dtm)
words <- sort(rowSums(matrix),decreasing=TRUE)
df_wc <- data.frame(word = names(words),freq=words)
wordcloud(words = df_wc$word, freq = df_wc$freq, min.freq = 1,max.words=200, random.order=FALSE, rot.per=0.35,colors=brewer.pal(8, "Dark2"))

#Fiserv 2016
#vector containing only the text
text <- df_Fiserv_2016$text
#corpus
docs <- Corpus(VectorSource(text))
docs <- docs %>%
tm_map(removeNumbers) %>%
tm_map(removePunctuation) %>%
tm_map(stripWhitespace)
Warning: transformation drops documentsWarning: transformation drops documentsWarning: transformation drops documents
docs <- tm_map(docs, content_transformer(tolower))
Warning: transformation drops documents
docs <- tm_map(docs, removeWords, stopwords("english"))
Warning: transformation drops documents
dtm <- TermDocumentMatrix(docs)
matrix <- as.matrix(dtm)
words <- sort(rowSums(matrix),decreasing=TRUE)
df_wc <- data.frame(word = names(words),freq=words)
wordcloud(words = df_wc$word, freq = df_wc$freq, min.freq = 1,max.words=200, random.order=FALSE, rot.per=0.35,colors=brewer.pal(8, "Dark2"))

#Fiserv 2018
#vector containing only the text
text <- df_Fiserv_2018$text
#corpus
docs <- Corpus(VectorSource(text))
docs <- docs %>%
tm_map(removeNumbers) %>%
tm_map(removePunctuation) %>%
tm_map(stripWhitespace)
Warning: transformation drops documentsWarning: transformation drops documentsWarning: transformation drops documents
docs <- tm_map(docs, content_transformer(tolower))
Warning: transformation drops documents
docs <- tm_map(docs, removeWords, stopwords("english"))
Warning: transformation drops documents
dtm <- TermDocumentMatrix(docs)
matrix <- as.matrix(dtm)
words <- sort(rowSums(matrix),decreasing=TRUE)
df_wc <- data.frame(word = names(words),freq=words)
wordcloud(words = df_wc$word, freq = df_wc$freq, min.freq = 1,max.words=200, random.order=FALSE, rot.per=0.35,colors=brewer.pal(8, "Dark2"))

#vector containing only the text
text <- df_WM_2018$text
#corpus
docs <- Corpus(VectorSource(text))
docs <- docs %>%
tm_map(removeNumbers) %>%
tm_map(removePunctuation) %>%
tm_map(stripWhitespace)
Warning: transformation drops documentsWarning: transformation drops documentsWarning: transformation drops documents
docs <- tm_map(docs, content_transformer(tolower))
Warning: transformation drops documents
docs <- tm_map(docs, removeWords, stopwords("english"))
Warning: transformation drops documents
dtm <- TermDocumentMatrix(docs)
matrix <- as.matrix(dtm)
words <- sort(rowSums(matrix),decreasing=TRUE)
df_wc <- data.frame(word = names(words),freq=words)
wordcloud(words = df_wc$word, freq = df_wc$freq, min.freq = 1,max.words=200, random.order=FALSE, rot.per=0.35,colors=brewer.pal(8, "Dark2"))

#Waste Management 2018
#vector containing only the text
text <- df_WM_2018$text
#Wills Tower 2016
#vector containing only the text
text <- df_Wills_2016$text
#corpus
docs <- Corpus(VectorSource(text))
docs <- docs %>%
tm_map(removeNumbers) %>%
tm_map(removePunctuation) %>%
tm_map(stripWhitespace)
Warning: transformation drops documentsWarning: transformation drops documentsWarning: transformation drops documents
docs <- tm_map(docs, content_transformer(tolower))
Warning: transformation drops documents
docs <- tm_map(docs, removeWords, stopwords("english"))
Warning: transformation drops documents
dtm <- TermDocumentMatrix(docs)
matrix <- as.matrix(dtm)
words <- sort(rowSums(matrix),decreasing=TRUE)
df_wc <- data.frame(word = names(words),freq=words)
wordcloud(words = df_wc$word, freq = df_wc$freq, min.freq = 1,max.words=200, random.order=FALSE, rot.per=0.35,colors=brewer.pal(8, "Dark2"))

#Wills Tower 2018
#vector containing only the text
text <- df_Wills_2018$text
#corpus
docs <- Corpus(VectorSource(text))
docs <- docs %>%
tm_map(removeNumbers) %>%
tm_map(removePunctuation) %>%
tm_map(stripWhitespace)
Warning: transformation drops documentsWarning: transformation drops documentsWarning: transformation drops documents
docs <- tm_map(docs, content_transformer(tolower))
Warning: transformation drops documents
docs <- tm_map(docs, removeWords, stopwords("english"))
Warning: transformation drops documents
dtm <- TermDocumentMatrix(docs)
matrix <- as.matrix(dtm)
words <- sort(rowSums(matrix),decreasing=TRUE)
df_wc <- data.frame(word = names(words),freq=words)
wordcloud(words = df_wc$word, freq = df_wc$freq, min.freq = 1,max.words=200, random.order=FALSE, rot.per=0.35,colors=brewer.pal(8, "Dark2"))

#Tripadvisor 2016
#vector containing only the text
text <- df_tripadvisor_2016$text
#corpus
docs <- Corpus(VectorSource(text))
docs <- docs %>%
tm_map(removeNumbers) %>%
tm_map(removePunctuation) %>%
tm_map(stripWhitespace)
Warning: transformation drops documentsWarning: transformation drops documentsWarning: transformation drops documents
docs <- tm_map(docs, content_transformer(tolower))
Warning: transformation drops documents
docs <- tm_map(docs, removeWords, stopwords("english"))
Warning: transformation drops documents
dtm <- TermDocumentMatrix(docs)
matrix <- as.matrix(dtm)
words <- sort(rowSums(matrix),decreasing=TRUE)
df_wc <- data.frame(word = names(words),freq=words)
wordcloud(words = df_wc$word, freq = df_wc$freq, min.freq = 1,max.words=200, random.order=FALSE, rot.per=0.35,colors=brewer.pal(8, "Dark2"))

<!-- rnb-source-end -->
<!-- rnb-output-begin eyJkYXRhIjoiRXJyb3I6IGF0dGVtcHQgdG8gdXNlIHplcm8tbGVuZ3RoIHZhcmlhYmxlIG5hbWVcbiJ9 -->
Error: attempt to use zero-length variable name
<!-- rnb-output-end -->
<!-- rnb-chunk-end -->
<!-- rnb-chunk-begin -->
<!-- rnb-source-begin eyJkYXRhIjoiYGBgclxuI1RyaXBhZHZpc29yIDIwMThcblxuI3ZlY3RvciBjb250YWluaW5nIG9ubHkgdGhlIHRleHRcbnRleHQgPC0gZGZfdHJpcGFkdmlzb3JfMjAxOCR0ZXh0XG4jY29ycHVzICBcbmRvY3MgPC0gQ29ycHVzKFZlY3RvclNvdXJjZSh0ZXh0KSlcblxuZG9jcyA8LSBkb2NzICU+JVxuICB0bV9tYXAocmVtb3ZlTnVtYmVycykgJT4lXG4gIHRtX21hcChyZW1vdmVQdW5jdHVhdGlvbikgJT4lXG4gIHRtX21hcChzdHJpcFdoaXRlc3BhY2UpXG5gYGAifQ== -->
```r
#Tripadvisor 2018
#vector containing only the text
text <- df_tripadvisor_2018$text
#corpus
docs <- Corpus(VectorSource(text))
docs <- docs %>%
tm_map(removeNumbers) %>%
tm_map(removePunctuation) %>%
tm_map(stripWhitespace)
Warning: transformation drops documentsWarning: transformation drops documentsWarning: transformation drops documents
docs <- tm_map(docs, content_transformer(tolower))
Warning: transformation drops documents
docs <- tm_map(docs, removeWords, stopwords("english"))
Warning: transformation drops documents
dtm <- TermDocumentMatrix(docs)
matrix <- as.matrix(dtm)
words <- sort(rowSums(matrix),decreasing=TRUE)
df_wc <- data.frame(word = names(words),freq=words)
wordcloud(words = df_wc$word, freq = df_wc$freq, min.freq = 1,max.words=200, random.order=FALSE, rot.per=0.35,colors=brewer.pal(8, "Dark2"))

#DavitaKent
#vector containing only the text
text <- df_DavitaK_2016$text
#corpus
docs <- Corpus(VectorSource(text))
docs <- docs %>%
tm_map(removeNumbers) %>%
tm_map(removePunctuation) %>%
tm_map(stripWhitespace)
Warning: transformation drops documentsWarning: transformation drops documentsWarning: transformation drops documents
docs <- tm_map(docs, content_transformer(tolower))
Warning: transformation drops documents
docs <- tm_map(docs, removeWords, stopwords("english"))
Warning: transformation drops documents
dtm <- TermDocumentMatrix(docs)
matrix <- as.matrix(dtm)
words <- sort(rowSums(matrix),decreasing=TRUE)
df_wc <- data.frame(word = names(words),freq=words)
wordcloud(words = df_wc$word, freq = df_wc$freq, min.freq = 1,max.words=200, random.order=FALSE, rot.per=0.35,colors=brewer.pal(8, "Dark2"))

#Davita Kent 2018
#vector containing only the text
text <- df_DavitaK_2018$text
#corpus
docs <- Corpus(VectorSource(text))
docs <- docs %>%
tm_map(removeNumbers) %>%
tm_map(removePunctuation) %>%
tm_map(stripWhitespace)
Warning: transformation drops documentsWarning: transformation drops documentsWarning: transformation drops documents
docs <- tm_map(docs, content_transformer(tolower))
Warning: transformation drops documents
docs <- tm_map(docs, removeWords, stopwords("english"))
Warning: transformation drops documents
dtm <- TermDocumentMatrix(docs)
matrix <- as.matrix(dtm)
words <- sort(rowSums(matrix),decreasing=TRUE)
df_wc <- data.frame(word = names(words),freq=words)
wordcloud(words = df_wc$word, freq = df_wc$freq, min.freq = 1,max.words=200, random.order=FALSE, rot.per=0.35,colors=brewer.pal(8, "Dark2"))

#Starbucks 2018
#vector containing only the text
text <- df_Starbucks_2018$text
#corpus
docs <- Corpus(VectorSource(text))
docs <- docs %>%
tm_map(removeNumbers) %>%
tm_map(removePunctuation) %>%
tm_map(stripWhitespace)
Warning: transformation drops documentsWarning: transformation drops documentsWarning: transformation drops documents
docs <- tm_map(docs, content_transformer(tolower))
Warning: transformation drops documents
docs <- tm_map(docs, removeWords, stopwords("english"))
Warning: transformation drops documents
dtm <- TermDocumentMatrix(docs)
matrix <- as.matrix(dtm)
words <- sort(rowSums(matrix),decreasing=TRUE)
df_wc <- data.frame(word = names(words),freq=words)
wordcloud(words = df_wc$word, freq = df_wc$freq, min.freq = 1,max.words=200, random.order=FALSE, rot.per=0.35,colors=brewer.pal(8, "Dark2"))

#McCormick 2018
#vector containing only the text
text <- df_McCormick_2018$text
#corpus
docs <- Corpus(VectorSource(text))
docs <- docs %>%
tm_map(removeNumbers) %>%
tm_map(removePunctuation) %>%
tm_map(stripWhitespace)
Warning: transformation drops documentsWarning: transformation drops documentsWarning: transformation drops documents
docs <- tm_map(docs, content_transformer(tolower))
Warning: transformation drops documents
docs <- tm_map(docs, removeWords, stopwords("english"))
Warning: transformation drops documents
dtm <- TermDocumentMatrix(docs)
matrix <- as.matrix(dtm)
words <- sort(rowSums(matrix),decreasing=TRUE)
df_wc <- data.frame(word = names(words),freq=words)
wordcloud(words = df_wc$word, freq = df_wc$freq, min.freq = 1,max.words=200, random.order=FALSE, rot.per=0.35,colors=brewer.pal(8, "Dark2"))

#IHS Markit 2018
#vector containing only the text
text <- df_IHS_2018$text
#corpus
docs <- Corpus(VectorSource(text))
docs <- docs %>%
tm_map(removeNumbers) %>%
tm_map(removePunctuation) %>%
tm_map(stripWhitespace)
Warning: transformation drops documentsWarning: transformation drops documentsWarning: transformation drops documents
docs <- tm_map(docs, content_transformer(tolower))
Warning: transformation drops documents
docs <- tm_map(docs, removeWords, stopwords("english"))
Warning: transformation drops documents
dtm <- TermDocumentMatrix(docs)
matrix <- as.matrix(dtm)
words <- sort(rowSums(matrix),decreasing=TRUE)
df_wc <- data.frame(word = names(words),freq=words)
wordcloud(words = df_wc$word, freq = df_wc$freq, min.freq = 1,max.words=200, random.order=FALSE, rot.per=0.35,colors=brewer.pal(8, "Dark2"))

#vector containing only the text
text <- df_AMD_2018$text
#corpus
docs <- Corpus(VectorSource(text))
docs <- docs %>%
tm_map(removeNumbers) %>%
tm_map(removePunctuation) %>%
tm_map(stripWhitespace)
Warning: transformation drops documentsWarning: transformation drops documentsWarning: transformation drops documents
docs <- tm_map(docs, content_transformer(tolower))
Warning: transformation drops documents
docs <- tm_map(docs, removeWords, stopwords("english"))
Warning: transformation drops documents
dtm <- TermDocumentMatrix(docs)
matrix <- as.matrix(dtm)
words <- sort(rowSums(matrix),decreasing=TRUE)
df_wc <- data.frame(word = names(words),freq=words)
wordcloud(words = df_wc$word, freq = df_wc$freq, min.freq = 1,max.words=200, random.order=FALSE, rot.per=0.35,colors=brewer.pal(8, "Dark2"))

#ResMed 2016
#vector containing only the text
text <- df_ResMed_2016$text
#corpus
docs <- Corpus(VectorSource(text))
docs <- docs %>%
tm_map(removeNumbers) %>%
tm_map(removePunctuation) %>%
tm_map(stripWhitespace)
Warning: transformation drops documentsWarning: transformation drops documentsWarning: transformation drops documents
docs <- tm_map(docs, content_transformer(tolower))
Warning: transformation drops documents
docs <- tm_map(docs, removeWords, stopwords("english"))
Warning: transformation drops documents
dtm <- TermDocumentMatrix(docs)
matrix <- as.matrix(dtm)
words <- sort(rowSums(matrix),decreasing=TRUE)
df_wc <- data.frame(word = names(words),freq=words)
wordcloud(words = df_wc$word, freq = df_wc$freq, min.freq = 1,max.words=200, random.order=FALSE, rot.per=0.35,colors=brewer.pal(8, "Dark2"))

#ResMed 2018
#vector containing only the text
text <- df_ResMed_2018$text
#corpus
docs <- Corpus(VectorSource(text))
docs <- docs %>%
tm_map(removeNumbers) %>%
tm_map(removePunctuation) %>%
tm_map(stripWhitespace)
Warning: transformation drops documentsWarning: transformation drops documentsWarning: transformation drops documents
docs <- tm_map(docs, content_transformer(tolower))
Warning: transformation drops documents
docs <- tm_map(docs, removeWords, stopwords("english"))
Warning: transformation drops documents
dtm <- TermDocumentMatrix(docs)
matrix <- as.matrix(dtm)
words <- sort(rowSums(matrix),decreasing=TRUE)
df_wc <- data.frame(word = names(words),freq=words)
wordcloud(words = df_wc$word, freq = df_wc$freq, min.freq = 1,max.words=200, random.order=FALSE, rot.per=0.35,colors=brewer.pal(8, "Dark2"))

#CA 2016
#vector containing only the text
text <- df_CA_2016$text
#corpus
docs <- Corpus(VectorSource(text))
docs <- docs %>%
tm_map(removeNumbers) %>%
tm_map(removePunctuation) %>%
tm_map(stripWhitespace)
Warning: transformation drops documentsWarning: transformation drops documentsWarning: transformation drops documents
docs <- tm_map(docs, content_transformer(tolower))
Warning: transformation drops documents
docs <- tm_map(docs, removeWords, stopwords("english"))
Warning: transformation drops documents
dtm <- TermDocumentMatrix(docs)
matrix <- as.matrix(dtm)
words <- sort(rowSums(matrix),decreasing=TRUE)
df_wc <- data.frame(word = names(words),freq=words)
wordcloud(words = df_wc$word, freq = df_wc$freq, min.freq = 1,max.words=200, random.order=FALSE, rot.per=0.35,colors=brewer.pal(8, "Dark2"))

#CA 2018
#vector containing only the text
text <- df_CA_2018$text
#corpus
docs <- Corpus(VectorSource(text))
docs <- docs %>%
tm_map(removeNumbers) %>%
tm_map(removePunctuation) %>%
tm_map(stripWhitespace)
Warning: transformation drops documentsWarning: transformation drops documentsWarning: transformation drops documents
docs <- tm_map(docs, content_transformer(tolower))
Warning: transformation drops documents
docs <- tm_map(docs, removeWords, stopwords("english"))
Warning: transformation drops documents
dtm <- TermDocumentMatrix(docs)
matrix <- as.matrix(dtm)
words <- sort(rowSums(matrix),decreasing=TRUE)
df_wc <- data.frame(word = names(words),freq=words)
wordcloud(words = df_wc$word, freq = df_wc$freq, min.freq = 1,max.words=200, random.order=FALSE, rot.per=0.35,colors=brewer.pal(8, "Dark2"))

#GM 2016
#vector containing only the text
text <- df_GM_2016$text
#corpus
docs <- Corpus(VectorSource(text))
docs <- docs %>%
tm_map(removeNumbers) %>%
tm_map(removePunctuation) %>%
tm_map(stripWhitespace)
Warning: transformation drops documentsWarning: transformation drops documentsWarning: transformation drops documents
docs <- tm_map(docs, content_transformer(tolower))
Warning: transformation drops documents
docs <- tm_map(docs, removeWords, stopwords("english"))
Warning: transformation drops documents
dtm <- TermDocumentMatrix(docs)
matrix <- as.matrix(dtm)
words <- sort(rowSums(matrix),decreasing=TRUE)
df_wc <- data.frame(word = names(words),freq=words)
wordcloud(words = df_wc$word, freq = df_wc$freq, min.freq = 1,max.words=200, random.order=FALSE, rot.per=0.35,colors=brewer.pal(8, "Dark2"))

#GM 2018
#vector containing only the text
text <- df_GM_2018$text
#corpus
docs <- Corpus(VectorSource(text))
docs <- docs %>%
tm_map(removeNumbers) %>%
tm_map(removePunctuation) %>%
tm_map(stripWhitespace)
Warning: transformation drops documentsWarning: transformation drops documentsWarning: transformation drops documents
docs <- tm_map(docs, content_transformer(tolower))
Warning: transformation drops documents
docs <- tm_map(docs, removeWords, stopwords("english"))
Warning: transformation drops documents
dtm <- TermDocumentMatrix(docs)
matrix <- as.matrix(dtm)
words <- sort(rowSums(matrix),decreasing=TRUE)
df_wc <- data.frame(word = names(words),freq=words)
wordcloud(words = df_wc$word, freq = df_wc$freq, min.freq = 1,max.words=200, random.order=FALSE, rot.per=0.35,colors=brewer.pal(8, "Dark2"))

#Aetna 2016
#vector containing only the text
text <- df_Aetna_2016$text
#corpus
docs <- Corpus(VectorSource(text))
docs <- docs %>%
tm_map(removeNumbers) %>%
tm_map(removePunctuation) %>%
tm_map(stripWhitespace)
Warning: transformation drops documentsWarning: transformation drops documentsWarning: transformation drops documents
docs <- tm_map(docs, content_transformer(tolower))
Warning: transformation drops documents
docs <- tm_map(docs, removeWords, stopwords("english"))
Warning: transformation drops documents
dtm <- TermDocumentMatrix(docs)
matrix <- as.matrix(dtm)
words <- sort(rowSums(matrix),decreasing=TRUE)
df_wc <- data.frame(word = names(words),freq=words)
wordcloud(words = df_wc$word, freq = df_wc$freq, min.freq = 1,max.words=200, random.order=FALSE, rot.per=0.35,colors=brewer.pal(8, "Dark2"))

#Aetna 2018
#vector containing only the text
text <- df_Aetna_2018$text
#corpus
docs <- Corpus(VectorSource(text))
docs <- docs %>%
tm_map(removeNumbers) %>%
tm_map(removePunctuation) %>%
tm_map(stripWhitespace)
Warning: transformation drops documentsWarning: transformation drops documentsWarning: transformation drops documents
docs <- tm_map(docs, content_transformer(tolower))
Warning: transformation drops documents
docs <- tm_map(docs, removeWords, stopwords("english"))
Warning: transformation drops documents
dtm <- TermDocumentMatrix(docs)
matrix <- as.matrix(dtm)
words <- sort(rowSums(matrix),decreasing=TRUE)
df_wc <- data.frame(word = names(words),freq=words)
wordcloud(words = df_wc$word, freq = df_wc$freq, min.freq = 1,max.words=200, random.order=FALSE, rot.per=0.35,colors=brewer.pal(8, "Dark2"))

#NRG 2016
#vector containing only the text
text <- df_NRG_2016$text
#corpus
docs <- Corpus(VectorSource(text))
docs <- docs %>%
tm_map(removeNumbers) %>%
tm_map(removePunctuation) %>%
tm_map(stripWhitespace)
Warning: transformation drops documentsWarning: transformation drops documentsWarning: transformation drops documents
docs <- tm_map(docs, content_transformer(tolower))
Warning: transformation drops documents
docs <- tm_map(docs, removeWords, stopwords("english"))
Warning: transformation drops documents
dtm <- TermDocumentMatrix(docs)
matrix <- as.matrix(dtm)
words <- sort(rowSums(matrix),decreasing=TRUE)
df_wc <- data.frame(word = names(words),freq=words)
wordcloud(words = df_wc$word, freq = df_wc$freq, min.freq = 1,max.words=200, random.order=FALSE, rot.per=0.35,colors=brewer.pal(8, "Dark2"))

#NRG 2018
#NRG 2016
#vector containing only the text
text <- df_NRG_2018$text
#corpus
docs <- Corpus(VectorSource(text))
docs <- docs %>%
tm_map(removeNumbers) %>%
tm_map(removePunctuation) %>%
tm_map(stripWhitespace)
Warning: transformation drops documentsWarning: transformation drops documentsWarning: transformation drops documents
docs <- tm_map(docs, content_transformer(tolower))
Warning: transformation drops documents
docs <- tm_map(docs, removeWords, stopwords("english"))
Warning: transformation drops documents
dtm <- TermDocumentMatrix(docs)
matrix <- as.matrix(dtm)
words <- sort(rowSums(matrix),decreasing=TRUE)
df_wc <- data.frame(word = names(words),freq=words)
wordcloud(words = df_wc$word, freq = df_wc$freq, min.freq = 1,max.words=200, random.order=FALSE, rot.per=0.35,colors=brewer.pal(8, "Dark2"))

#Medtronic 2016
#vector containing only the text
text <- df_Medtronic_2016$text
#corpus
docs <- Corpus(VectorSource(text))
docs <- docs %>%
tm_map(removeNumbers) %>%
tm_map(removePunctuation) %>%
tm_map(stripWhitespace)
Warning: transformation drops documentsWarning: transformation drops documentsWarning: transformation drops documents
docs <- tm_map(docs, content_transformer(tolower))
Warning: transformation drops documents
docs <- tm_map(docs, removeWords, stopwords("english"))
Warning: transformation drops documents
dtm <- TermDocumentMatrix(docs)
matrix <- as.matrix(dtm)
words <- sort(rowSums(matrix),decreasing=TRUE)
df_wc <- data.frame(word = names(words),freq=words)
wordcloud(words = df_wc$word, freq = df_wc$freq, min.freq = 1,max.words=200, random.order=FALSE, rot.per=0.35,colors=brewer.pal(8, "Dark2"))

#Medtronic 2018
#vector containing only the text
text <- df_Medtronic_2018$text
#corpus
docs <- Corpus(VectorSource(text))
docs <- docs %>%
tm_map(removeNumbers) %>%
tm_map(removePunctuation) %>%
tm_map(stripWhitespace)
Warning: transformation drops documentsWarning: transformation drops documentsWarning: transformation drops documents
docs <- tm_map(docs, content_transformer(tolower))
Warning: transformation drops documents
docs <- tm_map(docs, removeWords, stopwords("english"))
Warning: transformation drops documents
dtm <- TermDocumentMatrix(docs)
matrix <- as.matrix(dtm)
words <- sort(rowSums(matrix),decreasing=TRUE)
df_wc <- data.frame(word = names(words),freq=words)
wordcloud(words = df_wc$word, freq = df_wc$freq, min.freq = 1,max.words=200, random.order=FALSE, rot.per=0.35,colors=brewer.pal(8, "Dark2"))

#Juniper 2016
#vector containing only the text
text <- df_Juniper_2016$text
#corpus
docs <- Corpus(VectorSource(text))
docs <- docs %>%
tm_map(removeNumbers) %>%
tm_map(removePunctuation) %>%
tm_map(stripWhitespace)
Warning: transformation drops documentsWarning: transformation drops documentsWarning: transformation drops documents
docs <- tm_map(docs, content_transformer(tolower))
Warning: transformation drops documents
docs <- tm_map(docs, removeWords, stopwords("english"))
Warning: transformation drops documents
dtm <- TermDocumentMatrix(docs)
matrix <- as.matrix(dtm)
words <- sort(rowSums(matrix),decreasing=TRUE)
df_wc <- data.frame(word = names(words),freq=words)
wordcloud(words = df_wc$word, freq = df_wc$freq, min.freq = 1,max.words=200, random.order=FALSE, rot.per=0.35,colors=brewer.pal(8, "Dark2"))

#Juniper 2018
#vector containing only the text
text <- df_Juniper_2018$text
#corpus
docs <- Corpus(VectorSource(text))
docs <- docs %>%
tm_map(removeNumbers) %>%
tm_map(removePunctuation) %>%
tm_map(stripWhitespace)
Warning: transformation drops documentsWarning: transformation drops documentsWarning: transformation drops documents
docs <- tm_map(docs, content_transformer(tolower))
Warning: transformation drops documents
docs <- tm_map(docs, removeWords, stopwords("english"))
Warning: transformation drops documents
dtm <- TermDocumentMatrix(docs)
matrix <- as.matrix(dtm)
words <- sort(rowSums(matrix),decreasing=TRUE)
df_wc <- data.frame(word = names(words),freq=words)
wordcloud(words = df_wc$word, freq = df_wc$freq, min.freq = 1,max.words=200, random.order=FALSE, rot.per=0.35,colors=brewer.pal(8, "Dark2"))

#Aetna Ramon
#vector containing only the text
text <- df_AetnaR_2018$text
#corpus
docs <- Corpus(VectorSource(text))
docs <- docs %>%
tm_map(removeNumbers) %>%
tm_map(removePunctuation) %>%
tm_map(stripWhitespace)
Warning: transformation drops documentsWarning: transformation drops documentsWarning: transformation drops documents
docs <- tm_map(docs, content_transformer(tolower))
Warning: transformation drops documents
docs <- tm_map(docs, removeWords, stopwords("english"))
Warning: transformation drops documents
dtm <- TermDocumentMatrix(docs)
matrix <- as.matrix(dtm)
words <- sort(rowSums(matrix),decreasing=TRUE)
df_wc <- data.frame(word = names(words),freq=words)
wordcloud(words = df_wc$word, freq = df_wc$freq, min.freq = 1,max.words=200, random.order=FALSE, rot.per=0.35,colors=brewer.pal(8, "Dark2"))

#Netflix
#vector containing only the text
text <- df_Netflix_2016$text
#corpus
docs <- Corpus(VectorSource(text))
docs <- docs %>%
tm_map(removeNumbers) %>%
tm_map(removePunctuation) %>%
tm_map(stripWhitespace)
Warning: transformation drops documentsWarning: transformation drops documentsWarning: transformation drops documents
docs <- tm_map(docs, content_transformer(tolower))
Warning: transformation drops documents
docs <- tm_map(docs, removeWords, stopwords("english"))
Warning: transformation drops documents
dtm <- TermDocumentMatrix(docs)
matrix <- as.matrix(dtm)
words <- sort(rowSums(matrix),decreasing=TRUE)
df_wc <- data.frame(word = names(words),freq=words)
wordcloud(words = df_wc$word, freq = df_wc$freq, min.freq = 1,max.words=200, random.order=FALSE, rot.per=0.35,colors=brewer.pal(8, "Dark2"))

#Disney
#vector containing only the text
text <- df_Disney_2018$text
#corpus
docs <- Corpus(VectorSource(text))
docs <- docs %>%
tm_map(removeNumbers) %>%
tm_map(removePunctuation) %>%
tm_map(stripWhitespace)
Warning: transformation drops documentsWarning: transformation drops documentsWarning: transformation drops documents
docs <- tm_map(docs, content_transformer(tolower))
Warning: transformation drops documents
docs <- tm_map(docs, removeWords, stopwords("english"))
Warning: transformation drops documents
dtm <- TermDocumentMatrix(docs)
matrix <- as.matrix(dtm)
words <- sort(rowSums(matrix),decreasing=TRUE)
df_wc <- data.frame(word = names(words),freq=words)
wordcloud(words = df_wc$word, freq = df_wc$freq, min.freq = 1,max.words=200, random.order=FALSE, rot.per=0.35,colors=brewer.pal(8, "Dark2"))

#Fox 2015
#vector containing only the text
text <- df_Fox_2015$text
#corpus
docs <- Corpus(VectorSource(text))
docs <- docs %>%
tm_map(removeNumbers) %>%
tm_map(removePunctuation) %>%
tm_map(stripWhitespace)
Warning: transformation drops documentsWarning: transformation drops documentsWarning: transformation drops documents
docs <- tm_map(docs, content_transformer(tolower))
Warning: transformation drops documents
docs <- tm_map(docs, removeWords, stopwords("english"))
Warning: transformation drops documents
dtm <- TermDocumentMatrix(docs)
matrix <- as.matrix(dtm)
words <- sort(rowSums(matrix),decreasing=TRUE)
df_wc <- data.frame(word = names(words),freq=words)
wordcloud(words = df_wc$word, freq = df_wc$freq, min.freq = 1,max.words=200, random.order=FALSE, rot.per=0.35,colors=brewer.pal(8, "Dark2"))

---
title: "R Notebook"
output: html_notebook
---

This is an [R Markdown](http://rmarkdown.rstudio.com) Notebook. When you execute code within the notebook, the results appear beneath the code.

Try executing this chunk by clicking the *Run* button within the chunk or by placing your cursor inside it and pressing *Cmd+Shift+Enter*.

```{r}
plot(cars)
```

Add a new chunk by clicking the *Insert Chunk* button on the toolbar or by pressing *Cmd+Option+I*.

When you save the notebook, an HTML file containing the code and output will be saved alongside it (click the *Preview* button or press *Cmd+Shift+K* to preview the HTML file).

The preview shows you a rendered HTML copy of the contents of the editor. Consequently, unlike *Knit*, *Preview* does not run any R code chunks. Instead, the output of the chunk when it was last run in the editor is displayed.

```{r}
setwd("/Users/jayshreenohar/Downloads")
```

##Merge datasets

```{r}
#Johnson
#2016
# Merge dataframes
df_Johnson_2016 <- merge(Johnson_2016_Tweets_CEO, Johnson_sp_2016, by ="date", all.x = TRUE)
write.csv(df_Johnson_2016, file = "Johnson_df_2016", row.names = FALSE)

#2017
df_Johnson_2017 <- merge(Tweets_2017_Johnson, Johnson_sp_2017, by ="date", all.x = TRUE)
write.csv(df_Johnson_2017, file = "Johnson_df_2017", row.names = FALSE)

df_Johnson_2016
```

```{r}
df_Johnson_2017
```

```{r}
#Nasdaq
#2017-2018
df_NASDAQ <- merge(Tweets_Nasdaq, NASDAQ_new, by = "date", all.x = TRUE)
write.csv(df_NASDAQ, file = "Nasdaq_2017", row.names = FALSE)

df_NASDAQ
```

```{r}
#Autodesk
#2018
df_autodesk <- merge(Autodesk_Tweets, Autodesk_SP_2018, by = "date", all.x = TRUE)
write.csv(df_autodesk, file = "autodesk_2018", row.names = FALSE)

df_autodesk
```

```{r}
#Hasbro
#2018
df_Hasbro_2018 <- merge(Hasbro_Tweets_2018, Hasbro_SP_2018, by = "date", all.x = TRUE)
write.csv(df_Hasbro_2018, file = "Hasbro_2018", row.names = FALSE)

#2016
df_Hasbro_2016 <- merge(Hasbro_Tweets_2016, Hasbro_SP_2016, by = "date", all.x = TRUE)
write.csv(df_Hasbro_2016, file = "Hasbro_2018", row.names = FALSE)

df_Hasbro_2016
```

```{r}
df_Hasbro_2018
```

```{r}
#Intel
#2018
df_Intel_2018 <- merge(Intel_Tweets_2018, Intel_2018_SP, by = "date", all.x = TRUE)
write.csv(df_Intel_2018, file = "Intel_2018", row.names = FALSE)

#2016
df_Intel_2016 <- merge(Intel_Tweets_2016, Intel_2016, by = "date", all.x = TRUE)
write.csv(df_Intel_2016, file = "Intel_2016", row.names = FALSE)

df_Intel_2016
```

```{r}
df_Intel_2018
```

```{r}
#Activision
#2016
df_Activision_2016 <- merge(Activision_Tweets_2016, Activision_SP_2016, by = "date", all.x = TRUE)
write.csv(df_Activision_2016, file = "Activision_2016", row.names = FALSE)

#2018
df_Activision_2018 <- merge(Activision_Tweets_2018, Activision_SP_2018, by = "date", all.x = TRUE)
write.csv(df_Activision_2018, file = "Activision_2018", row.names = FALSE)

df_Activision_2016
```

```{r}
df_Activision_2018
```

```{r}
#Inuit 
#2016
df_Inuit_2016 <- merge(Tweets_Inuit_2016, Inuit_SP_2016, by = "date", all.x = TRUE)
write.csv(df_Inuit_2016, file = "Intuit_2016", row.names = FALSE)

#2018
df_Inuit_2018 <- merge(Tweets_Inuit_2018, Intuit_SP_2018, by = "date", all.x = TRUE)
write.csv(df_Inuit_2018, file = "Intuit_2018", row.names = FALSE)

df_Inuit_2016
```

```{r}
df_Inuit_2018
```

```{r}
#Allergan 
#2016
df_Allergan_2016 <- merge(Allergan_Tweets_2016, Allergan_SP_2016, by = "date", all.x = TRUE)
write.csv(df_Allergan_2016, file = "Allergan_2016", row.names = FALSE)

#2018
df_Allergan_2018 <- merge(Allergan_Tweets_2018, Allergan_SP_2018, by = "date", all.x = TRUE)
write.csv(df_Allergan_2018, file = "Allergan_2018", row.names = FALSE)

df_Allergan_2016
```

```{r}
df_Allergan_2018
```

```{r}
#Humana
#2016
df_Humana_2016 <- merge(Humana_Tweets_2016, Humana_SP_2016, by = "date", all.x = TRUE)
write.csv(df_Humana_2016, file = "Humana_2016", row.names = FALSE)

#2018
df_Humana_2018 <- merge(Humana_2018_Tweets, Humana_SP_2018, by = "date", all.x = TRUE)
write.csv(df_Humana_2018, file = "Humana_2018", row.names = FALSE)

df_Humana_2016
```

```{r}
df_Humana_2018
```

```{r}
#Autodesk_Carl_Bass
#2016
df_CB_autodesk_2016 <- merge(Autodesk_CB_tweets_2016, Autodesk_CB_SP_2016, by = "date", all.x = TRUE)
write.csv(df_CB_autodesk_2016, file = "Autodesk_CB_2016", row.names = FALSE)

#2018
df_CB_autodesk_2018 <- merge(Autodesk_CB_tweets_2017, Autodesk_2017_SP_CB, by = "date", all.x = TRUE)
write.csv(df_CB_autodesk_2018, file = "Autodesk_CB_2018", row.names = FALSE)

df_CB_autodesk_2016
```

```{r}
df_CB_autodesk_2018
```

```{r}
#Equinox
#2018
df_equinox_2018 <- merge(Equinox_2018_Tweets, Equinox_2018_SP, by = "date", all.x = TRUE)
write.csv(df_equinox_2018, file = "Equinox_2018", row.names = FALSE)
df_equinox_2018
```

```{r}
#Cisco
#2016
df_cisco_2016 <- merge(Cisco_Tweets_2016, Cisco_SP_2016, by = "date", all.x = TRUE)
write.csv(df_cisco_2016, file = "Cisco_2016", row.names = FALSE)

#2018
df_cisco_2018 <- merge(Cisco_Tweets_2018, Cisco_SP_2018, by = "date", all.x = TRUE)
write.csv(df_cisco_2018, file = "Cisco_2018", row.names = FALSE)

df_cisco_2016
```

```{r}
df_cisco_2018
```

```{r}
#EBAY
#2016
df_EBAY_2016 <-merge(EBAY_Tweets_2016, EBAY_2016_SP, by = "date", all.x = TRUE)
write.csv(df_EBAY_2016, file = "EBAY_2016", row.names = FALSE)

#2018
df_EBAY_2018 <-merge(EBAY_Tweets_2018, EBAY_SP_2018, by = "date", all.x = TRUE)
write.csv(df_EBAY_2018, file = "EBAY_2018", row.names = FALSE)

df_EBAY_2016
```

```{r}
df_EBAY_2018
```

```{r}
#Davita
#2016
df_Davita_2016 <-merge(Tweets_Davita_2016, Davita_SP_2016, by = "date", all.x = TRUE)
write.csv(df_Davita_2016, file = "Davita_2016", row.names = FALSE)

#2017
df_Davita_2017 <-merge(Davita_Inc_Tweets_2017, Davita_SP_2017, by = "date", all.x = TRUE)
write.csv(df_Davita_2017, file = "Davita_2017", row.names = FALSE)

df_Davita_2016
```

```{r}
df_Davita_2017
```

```{r}
#Illumina 
#2016
df_Illumina_2016 <-merge(Illumina_Tweets_2016, Illumina_SP_2016, by = "date", all.x = TRUE)
write.csv(df_Illumina_2016, file = "Illumina_2016", row.names = FALSE)

#2018
df_Illumina_2018 <-merge(Illumina_Tweets_2017_2018, Illumina_2017_2018_SP, by = "date", all.x = TRUE)
write.csv(df_Illumina_2018, file = "Illumina_2018", row.names = FALSE)

df_Illumina_2016
```

```{r}
df_Illumina_2018
```

```{r}
#HomeDepot
#2013-2014
df_Homedepot_2013 <-merge(Home_depot_tweets_2013,Home_Depot_SP, by = "date", all.x = TRUE)
write.csv(df_Homedepot_2013, file = "Homedepot_2013", row.names = TRUE)
df_Homedepot_2013
```

```{r}
#Southwest_Airlines
#2016
df_Southwest_2016 <-merge(Southwest_Tweets_2016,Southwest_SP_2016, by = "date", all.x = TRUE)
write.csv(df_Southwest_2016, file = "Southwest_2016", row.names = FALSE)


#2018
df_Southwest_2018 <-merge(Southwest_Tweets_2018,Southwest_SP_2018, by = "date", all.x = TRUE)
write.csv(df_Southwest_2018, file = "Southwest_2018", row.names = FALSE)
df_Southwest_2016
```

```{r}
df_Southwest_2018
```

```{r}
#FIS
#2016
df_FIS_2016 <-merge(FIS_Tweets_2016, FIS_2016_SP, by = "date", all.x = TRUE)
write.csv(df_FIS_2016, file = "FIS_2016", row.names = FALSE)

#2018
df_FIS_2018 <-merge(FIS_Tweets_2018, FIS_2018_SP, by = "date", all.x = TRUE)
write.csv(df_FIS_2018, file = "FIS_2018", row.names = FALSE)
df_FIS_2016
```

```{r}
df_FIS_2018
```

```{r}
#Leucadia_Nation
#2016
df_Leucadia_2016 <-merge(Leucadia_Tweets_2016, Leucadia_SP_2016, by = "date", all.x = TRUE)
write.csv(df_Leucadia_2016, file = "Leucadia_2016", row.names = FALSE)

#2018
df_Leucadia_2018 <-merge(Leucadia_2018_Tweets, Leucadia_SP_2018, by = "date", all.x = TRUE)
write.csv(df_Leucadia_2018, file = "Leucadia_2018", row.names = FALSE)

df_Leucadia_2016
```

```{r}
df_Leucadia_2018
```

```{r}
#Verizon
#2018
df_Verizon_2018 <- merge(Verizon_Tweets_2018, Verizon_SP_2018, by = "date", all.x = TRUE)
write.csv(df_Verizon_2018, file = "Verizon_2018", row.names = FALSE)

df_Verizon_2018
```

```{r}
#Western Union 
#2018
df_WU_2018 <- merge(Western_Union_Tweets_2018, WU_SP_2018, by = "date", all.x = TRUE)
write.csv(df_WU_2018, file = "WU_2018", row.names = FALSE)

#2016
df_WU_2016 <- merge(Western_Union_Tweets_2016, WU_SP_2016, by = "date", all.x = TRUE)
write.csv(df_WU_2016, file = "WU_2016", row.names = FALSE)

df_WU_2016
```

```{r}
df_WU_2018
```

```{r}
#Red_Hat
#2016
df_RedHat_2016 <- merge(Red_Hat_Tweets_2016, Red_Hat_SP_2016, by = "date", all.x = TRUE)
write.csv(df_RedHat_2016, file = "RedHat_2016", row.names = FALSE)

#2018
df_RedHat_2018 <- merge(Red_Hat_Tweets_2018, Red_Hat_SP_2018, by = "date", all.x = TRUE)
write.csv(df_RedHat_2018, file = "RedHat_2018", row.names = FALSE)

df_RedHat_2016
```

```{r}
df_RedHat_2018
```

```{r}
#AMZN
#2016
df_AMZN_2016 <- merge(AMZN_Tweets_2016, AMZN_SP_2016, by = "date", all.x = TRUE)
write.csv(df_AMZN_2016, file = "AMZN_2016", row.names = FALSE)

#2018
df_AMZN_2018 <- merge(AMZN_Tweets_2018, AMZN_SP_2018, by = "date", all.x = TRUE)
write.csv(df_AMZN_2018, file = "AMZN_2018", row.names = FALSE)

df_AMZN_2016
```

```{r}
df_AMZN_2018
```

```{r}
#GE
#2016
df_GE_2016 <- merge(GE_Tweets_2016, GE_2016_SP, by = "date", all.x = TRUE)
write.csv(df_GE_2016, file = "GE_2016", row.names = FALSE)


#2017
df_GE_2017 <- merge(GE_Tweets_2017, GE_2017_SP, by = "date", all.x = TRUE)
write.csv(df_GE_2017, file = "GE_2017", row.names = FALSE)

df_GE_2016
```

```{r}
df_GE_2017
```

```{r}
#Fiserv
#2016
df_Fiserv_2016 <- merge(Fiserv_Tweets_2016, Fiserv_SP_2016, by = "date", all.x = TRUE)
write.csv(df_Fiserv_2016, file = "Fiserv_2016", row.names = FALSE)

#2018
df_Fiserv_2018 <- merge(Fiserv_Tweets_2018, Fiserv_SP_2018, by = "date", all.x = TRUE)
write.csv(df_Fiserv_2018, file = "Fiserv_2018", row.names = FALSE)

df_Fiserv_2016
```

```{r}
df_Fiserv_2018
```

```{r}
#Waste_Management
#2018
df_WM_2018 <- merge(WM_Tweets_2018, WM_SP_2018, by = "date", all.x = TRUE)
write.csv(df_WM_2018, file = "WM_2018", row.names = FALSE)
df_WM_2018
```

```{r}
#Wills_Tower
#2016
df_Wills_2016 <- merge(Wills_Tower_Tweets_2016, Wills_Tower_SP_2016, by = "date", all.x = TRUE)
write.csv(df_Wills_2016, file = "Wills_2016", row.names = FALSE)

#2018
df_Wills_2018 <- merge(Wills_Tower_Tweets_2018, WLTW_SP_2018, by = "date", all.x = TRUE)
write.csv(df_Wills_2018, file = "Wills_2018", row.names = FALSE)

df_Wills_2016
```

```{r}
df_Wills_2018
```

```{r}
#Tripadvisor
#2016
df_tripadvisor_2016 <- merge(Tripadvisor_Tweets_2016, Tripadvisor_SP_2016, by = "date", all.x = TRUE)
write.csv(df_tripadvisor_2016, file = "Tripadvisor_2016", row.names = FALSE)

#2018
df_tripadvisor_2018 <- merge(Tripadvisor_Tweets_2018, Tripadvisor_SP_2018, by = "date", all.x = TRUE)
write.csv(df_tripadvisor_2018, file = "Tripadvisor_2018", row.names = FALSE)

df_tripadvisor_2016
```

```{r}
df_tripadvisor_2018
```

```{r}
#Davita_Kent
#2016
df_DavitaK_2016 <- merge(Davita_Tweets_2016K, Davita_SP_2016, by = "date", all.x = TRUE)
write.csv(df_DavitaK_2016, file = "Davita_K_2016", row.names = FALSE)

#2018
df_DavitaK_2018 <- merge(Davita_Tweets_2018K, Davita_SP_2018, by = "date", all.x = TRUE)
write.csv(df_DavitaK_2018, file = "Davita_K_2018", row.names = FALSE)

df_DavitaK_2016
```

```{r}
df_DavitaK_2018
```

```{r}
#Starbucks
df_Starbucks_2018 <- merge(Starbucks_Tweets_2018, Starbucks_SP_2018, by = "date", all.x = TRUE)
write.csv(df_Starbucks_2018, file = "Starbucks_2018", row.names = FALSE)

df_Starbucks_2018
```

```{r}
#McCormick 
#2018
df_McCormick_2018 <- merge(McCormick_Tweets_2018, McCormick_SP_2018, by = "date", all.x = TRUE)
write.csv(df_McCormick_2018, file = "McCormick_2018", row.names = FALSE)

df_McCormick_2018
```

```{r}
#IHS Markit 
#2018
df_IHS_2018 <- merge(IHS_Tweets_2018, IHS_SP_2018, by = "date", all.x = TRUE)
write.csv(df_IHS_2018, file = "IHS_2018", row.names = FALSE)

df_IHS_2018
```

```{r}
#AMD
#2018
df_AMD_2018 <- merge(AMD_Tweets_2018, AMD_SP_2018, by = "date", all.x = TRUE)
write.csv(df_AMD_2018, file = "AMD_2018", row.names = FALSE)

df_AMD_2018
```

```{r}
#ResMed
#2016
df_ResMed_2016 <-merge(ResMed_Tweets_2016, ResMed_SP_2016, by = "date", all.x = TRUE) 
write.csv(df_ResMed_2016, file = "ResMed_2016", row.names = FALSE)

#2018
df_ResMed_2018 <-merge(ResMed_Tweets_2018, ResMed_SP_2018, by = "date", all.x = TRUE) 
write.csv(df_ResMed_2018, file = "ResMed_2018", row.names = FALSE)

df_ResMed_2016
```

```{r}
df_ResMed_2018
```

```{r}
#CA
#2016
df_CA_2016 <-merge(CA_Tweets_2016, CA_SP_2016, by = "date", all.x = TRUE) 
write.csv(df_CA_2016, file = "CA_2016", row.names = FALSE)

#2018
df_CA_2018 <-merge(CA_Tweets_2018, CA_SP_2018, by = "date", all.x = TRUE) 
write.csv(df_CA_2018, file = "CA_2018", row.names = FALSE)

df_CA_2016
```

```{r}
df_CA_2018
```

```{r}
#General_Motors
#2016
df_GM_2016 <-merge(GM_Tweets_2016, GM_SP_2016, by = "date", all.x = TRUE) 
write.csv(df_GM_2016, file = "GM_2016", row.names = FALSE)

#2018
df_GM_2018 <-merge(GM_Tweets_2018, GM_SP_2018, by = "date", all.x = TRUE) 
write.csv(df_GM_2018, file = "GM_2018", row.names = FALSE)

df_GM_2016
```

```{r}
df_GM_2018
```

```{r}
#Aetna
#2016
df_Aetna_2016 <-merge(Aetna_Tweets_2016, AET_SP_2016, by = "date", all.x = TRUE) 
write.csv(df_Aetna_2016, file = "Aetna_2016", row.names = FALSE)

#2018
df_Aetna_2018 <-merge(Aetna_Tweets_2018, Aetna_SP_2018, by = "date", all.x = TRUE) 
write.csv(df_Aetna_2018, file = "Aetna_2018", row.names = FALSE)

df_Aetna_2016
```

```{r}
df_Aetna_2018
```

```{r}
#NRG
#2016
df_NRG_2016 <-merge(NRG_Tweets_2016, NRG_SP_2016, by = "date", all.x = TRUE) 
write.csv(df_NRG_2016, file = "NRG_2016", row.names = FALSE)

#2018
df_NRG_2018 <-merge(NRG_Tweets_2018, NRG_SP_2018, by = "date", all.x = TRUE) 
write.csv(df_NRG_2018, file = "NRG_2018", row.names = FALSE)

df_NRG_2016
```

```{r}
df_NRG_2018
```

```{r}
#Medtronic
#2016
df_Medtronic_2016 <-merge(Medtronic_Tweets_2016, Medtronic_SP_2016, by = "date", all.x = TRUE) 
write.csv(df_Medtronic_2016, file = "Medtronic_2016", row.names = FALSE)

#2018
df_Medtronic_2018 <-merge(Medtronic_Tweets_2018, Medtronic_SP_2018, by = "date", all.x = TRUE) 
write.csv(df_Medtronic_2018, file = "Medtronic_2018", row.names = FALSE)

df_Medtronic_2016
```

```{r}
df_Medtronic_2018
```

```{r}
#Juniper
#2016
df_Juniper_2016 <-merge(Juniper_Tweets_2016, Juniper_SP_2016, by = "date", all.x = TRUE) 
write.csv(df_Juniper_2016, file = "Juniper_2016", row.names = FALSE)

#2018
df_Juniper_2018 <-merge(Juniper_Tweets_2018, Juniper_SP_2018, by = "date", all.x = TRUE) 
write.csv(df_Juniper_2018, file = "Juniper_2018", row.names = FALSE)

df_Juniper_2016
```

```{r}
df_Juniper_2018
```

```{r}
#Aetna_Ramon_2018
#2018
df_AetnaR_2018 <-merge(Aetna_Tweets_2018R, AetnaR_SP_2018, by = "date", all.x = TRUE) 
write.csv(df_AetnaR_2018, file = "AetnaR_2018", row.names = FALSE)

df_AetnaR_2018
```

```{r}
#Netflix
#2016
df_Netflix_2016 <-merge(Netflix_Tweets_2016, Netflix_SP_2016, by = "date", all.x = TRUE) 
write.csv(df_Netflix_2016, file = "Netflix_2016", row.names = FALSE)

df_Netflix_2016
```

```{r}
#Disney
df_Disney_2018 <-merge(Disney_Tweets_2018, Disney_SP_2018, by = "date", all.x = TRUE) 
write.csv(df_Disney_2018, file = "Disney_2018", row.names = FALSE)

df_Disney_2018
```

```{r}
#FOX
#2015
df_Fox_2015 <-merge(Fox_Tweets_2015, Fox_SP_2015, by = "date", all.x = TRUE) 
write.csv(df_Fox_2015, file = "Fox_2015", row.names = FALSE)

df_Fox_2015
```

```{r}
#Microsoft
#2016
df_Microsoft_2016 <-merge(Microsoft_Tweets_2016, Microsoft_SP_2016, by = "date", all.x = TRUE) 
write.csv(df_Microsoft_2016, file = "Microsoft_2016", row.names = FALSE)

#2018
df_Microsoft_2018 <-merge(Microsoft_Tweets_2018, Microsoft_SP_2018, by = "date", all.x = TRUE) 
write.csv(df_Microsoft_2018, file = "Microsoft_2018", row.names = FALSE)

df_Microsoft_2016
```

```{r}
df_Microsoft_2018
```

```{r}
#Juniper_Shaygan
#2014
df_Juniper_S_2014 <-merge(JuniperS_Tweets_2014, Juniper_SP_2014, by = "date", all.x = TRUE) 
write.csv(df_Juniper_S_2014, file = "Juniper_S_2014", row.names = FALSE)


df_Juniper_S_2014

```

```{r}
df_Juniper_S_2016
```

```{r}
#Synchrony Financial 
#2016
df_SYFM_2016 <-merge(SYFM_Tweets_2016, SYF_SP_2016, by = "date", all.x = TRUE) 
write.csv(df_SYFM_2016, file = "SYFM_2016", row.names = FALSE)

#2018
df_SYFM_2018 <-merge(SYFM_Tweets_2018, SYFM_SP_2018, by = "date", all.x = TRUE) 
write.csv(df_SYFM_2018, file = "SYFM_2018", row.names = FALSE)

df_SYFM_2016
```

```{r}
df_SYFM_2018
```

```{r}
#Southern Company
#2016
df_Southern_2016 <-merge(SouthernC_Tweets_2016, SO_SP_2016, by = "date", all.x = TRUE) 
write.csv(df_Southern_2016, file = "Southern_2016", row.names = FALSE)

#2018
df_Southern_2018 <-merge(SouthernC_Tweets_2018, SO_SP_2018, by = "date", all.x = TRUE) 
write.csv(df_Southern_2018, file = "Southern_2018", row.names = FALSE)

df_Southern_2016
```

```{r}
df_Southern_2018
```

```{r}
#Apple
#2016
df_Apple_2016 <-merge(Apple_Tweets_2016, Apple_SP_2016, by = "date", all.x = TRUE) 
write.csv(df_Apple_2016, file = "Apple_2016", row.names = FALSE)

#2018
df_Apple_2018 <-merge(Apple_Tweets_2018, Apple_SP_2018, by = "date", all.x = TRUE) 
write.csv(df_Apple_2018, file = "Apple_2018", row.names = FALSE)

df_Apple_2016
```

```{r}
df_Apple_2018
```

```{r}
#XL
#2016
df_XL_2016 <-merge(XL_Tweets_2016, XL_SP_2016, by = "date", all.x = TRUE) 
write.csv(df_XL_2016, file = "XL_2016", row.names = FALSE)

#2018
df_XL_2018 <-merge(XL_Tweets_2018, XL_SP_2018, by = "date", all.x = TRUE) 
write.csv(df_XL_2018, file = "XL_2018", row.names = FALSE)

df_XL_2016
```

```{r}
df_XL_2018
```

```{r}
#Tysonfoods
#2016
df_Tysonf_2016 <-merge(TSN_Tweets_2016, TSN_SP_2016, by = "date", all.x = TRUE) 
write.csv(df_Tysonf_2016, file = "TSN_2016", row.names = FALSE)

#2018
df_Tysonf_2018 <-merge(TSN_Tweets_2018, TSN_SP_2018, by = "date", all.x = TRUE) 
write.csv(df_Tysonf_2018, file = "TSN_2018", row.names = FALSE)

df_Tysonf_2016
```

```{r}
df_Tysonf_2018
```

```{r}
#Akamai
#2016
df_Akamai_2016 <-merge(Akamai_Tweets_2016, Akamai_SP_2016, by = "date", all.x = TRUE) 
write.csv(df_Akamai_2016, file = "Akamai_2016", row.names = FALSE)

#2018
df_Akamai_2018 <-merge(Akamai_Tweets_2018, Akamai_SP_2018, by = "date", all.x = TRUE) 
write.csv(df_Akamai_2018, file = "Akamai_2018", row.names = FALSE)

df_Akamai_2016
```

```{r}
df_Akamai_2018
```

###Clean

```{r}
#Packages
library(tm)
library(stringr)
install.packages("textstem")
library("textstem")




```

```{r}
#Function to clean the tweets
clean_tweets <- function(tweets) {
  
  # Convert text to lower case
  tweets <- tolower(tweets)
  
  # Remove URLs
  tweets <- stringr::str_replace_all(tweets, "http[^[:space:]]*", "")
  
  # Remove mentions
  tweets <- stringr::str_replace_all(tweets, "@[^[:space:]]*", "")
  
  # Remove hashtags
  tweets <- stringr::str_replace_all(tweets, "#[^[:space:]]*", "")
  
  # Remove punctuation marks
  tweets <- stringr::str_replace_all(tweets, "[[:punct:]]", "")
  
  # Remove numbers
  tweets <- stringr::str_replace_all(tweets, "[[:digit:]]", "")
  
  # Remove common stop words
  tweets <- tm::removeWords(tweets, tm::stopwords("en"))
  
  
  return(tweets)
}

df_Test$text <- clean_tweets(df_Test$text)

```

```{r}
#Lemmatize
df_Test$text <- lemmatize_words(df_Test$text)


```

#Numeric NA values & Tweets 

```{r}
#Johnson
#Remove columns 
df_Johnson_2016 <- df_Johnson_2016[,-c(6,8,9,10,15,17,18,19,20,21)] 
df_Johnson_2016

#Ticker 
df_Johnson_2016$TICKER[is.na(df_Johnson_2016$TICKER)] <- "JCI"

#PRC
PRC_AVG <- mean(df_Johnson_2016$PRC, na.rm = TRUE)
df_Johnson_2016$PRC[is.na(df_Johnson_2016$PRC)] <- PRC_AVG

#VOL
VOL_AVG <- mean(df_Johnson_2016$VOL, na.rm = TRUE)
df_Johnson_2016$VOL[is.na(df_Johnson_2016$VOL)] <- VOL_AVG

#BID
BID_AVG <- BID_AVG <- mean(df_Johnson_2016$BID, na.rm = TRUE)
df_Johnson_2016$BID[is.na(df_Johnson_2016$BID)] <- BID_AVG

#ASK
ASK_AVG <- ASK_AVG <- mean(df_Johnson_2016$ASK, na.rm = TRUE)
df_Johnson_2016$ASK[is.na(df_Johnson_2016$ASK)] <- ASK_AVG

#OPENPRC
OPEN_AVG <- OPEN_AVG <- mean(df_Johnson_2016$OPENPRC, na.rm = TRUE)
df_Johnson_2016$OPENPRC[is.na(df_Johnson_2016$OPENPRC)] <- OPEN_AVG

df_Johnson_2016
```


```{r}
df_Johnson_2016$text <- clean_tweets(df_Johnson_2016$text)
df_Johnson_2016$text <- lemmatize_words(df_Johnson_2016$text)
```


```{r}
```

```{r}
#Remove columns 
df_Johnson_2017 <- df_Johnson_2017[,-c(6,8,9,10,15,17,18,19,20,21)] 
df_Johnson_2017

#Ticker 
df_Johnson_2017$TICKER[is.na(df_Johnson_2017$TICKER)] <- "JCI"

#PRC
PRC_AVG <- mean(df_Johnson_2017$PRC, na.rm = TRUE)
df_Johnson_2017$PRC[is.na(df_Johnson_2017$PRC)] <- PRC_AVG

#VOL
VOL_AVG <- mean(df_Johnson_2017$VOL, na.rm = TRUE)
df_Johnson_2017$VOL[is.na(df_Johnson_2017$VOL)] <- VOL_AVG

#BID
BID_AVG <- BID_AVG <- mean(df_Johnson_2017$BID, na.rm = TRUE)
df_Johnson_2017$BID[is.na(df_Johnson_2017$BID)] <- BID_AVG

#ASK
ASK_AVG <- ASK_AVG <- mean(df_Johnson_2017$ASK, na.rm = TRUE)
df_Johnson_2017$ASK[is.na(df_Johnson_2017$ASK)] <- ASK_AVG

#OPENPRC
OPEN_AVG <- OPEN_AVG <- mean(df_Johnson_2017$OPENPRC, na.rm = TRUE)
df_Johnson_2017$OPENPRC[is.na(df_Johnson_2017$OPENPRC)] <- OPEN_AVG

df_Johnson_2017
```
```{r}
df_Johnson_2017$text <- clean_tweets(df_Johnson_2017$text)
df_Johnson_2017$text <- lemmatize_words(df_Johnson_2017$text)
```

```{r}
#NASDAQ
#Remove columns 
df_NASDAQ <- df_NASDAQ[,-c(6,8,9,10,15,17,18,19,20,21)] 
df_NASDAQ

#Ticker 
df_NASDAQ$TICKER[is.na(df_NASDAQ$TICKER)] <- "NDAQ"

#PRC
PRC_AVG <- mean(df_NASDAQ$PRC, na.rm = TRUE)
df_NASDAQ$PRC[is.na(df_NASDAQ$PRC)] <- PRC_AVG

#VOL
VOL_AVG <- mean(df_NASDAQ$VOL, na.rm = TRUE)
df_NASDAQ$VOL[is.na(df_NASDAQ$VOL)] <- VOL_AVG

#BID
BID_AVG <- BID_AVG <- mean(df_NASDAQ$BID, na.rm = TRUE)
df_NASDAQ$BID[is.na(df_NASDAQ$BID)] <- BID_AVG

#ASK
ASK_AVG <- ASK_AVG <- mean(df_NASDAQ$ASK, na.rm = TRUE)
df_NASDAQ$ASK[is.na(df_NASDAQ$ASK)] <- ASK_AVG

#OPENPRC
OPEN_AVG <- OPEN_AVG <- mean(df_NASDAQ$OPENPRC, na.rm = TRUE)
df_NASDAQ$OPENPRC[is.na(df_NASDAQ$OPENPRC)] <- OPEN_AVG

df_NASDAQ
```
```{r}
df_NASDAQ$text <- clean_tweets(df_NASDAQ$text)
df_NASDAQ$text <- lemmatize_words(df_NASDAQ$text)
```


```{r}
```
```{r}
#Autodesk
#Remove columns 
df_autodesk <- df_autodesk[,-c(6,8,9,10,15,17,18,19,20,21)] 
df_autodesk

#Ticker 
df_autodesk$TICKER[is.na(df_autodesk$TICKER)] <- "ADSK"

#PRC
PRC_AVG <- mean(df_autodesk$PRC, na.rm = TRUE)
df_autodesk$PRC[is.na(df_autodesk$PRC)] <- PRC_AVG

#VOL
VOL_AVG <- mean(df_autodesk$VOL, na.rm = TRUE)
df_autodesk$VOL[is.na(df_autodesk$VOL)] <- VOL_AVG

#BID
BID_AVG <- BID_AVG <- mean(df_autodesk$BID, na.rm = TRUE)
df_autodesk$BID[is.na(df_autodesk$BID)] <- BID_AVG

#ASK
ASK_AVG <- ASK_AVG <- mean(df_autodesk$ASK, na.rm = TRUE)
df_autodesk$ASK[is.na(df_autodesk$ASK)] <- ASK_AVG

#OPENPRC
OPEN_AVG <- OPEN_AVG <- mean(df_autodesk$OPENPRC, na.rm = TRUE)
df_autodesk$OPENPRC[is.na(df_autodesk$OPENPRC)] <- OPEN_AVG

df_autodesk
```           
```{r}
df_autodesk$text <- clean_tweets(df_autodesk$text)
df_autodesk$text <- lemmatize_words(df_autodesk$text)
```

```{r}
#Hasbro 
#Remove columns 
df_Hasbro_2016 <- df_Hasbro_2016[,-c(6,8,9,10,15,17,18,19,20,21)] 
df_Hasbro_2016 

#Ticker 
df_Hasbro_2016$TICKER[is.na(df_Hasbro_2016$TICKER)] <- "HAS"

#PRC
PRC_AVG <- mean(df_Hasbro_2016$PRC, na.rm = TRUE)
df_Hasbro_2016$PRC[is.na(df_Hasbro_2016$PRC)] <- PRC_AVG

#VOL
VOL_AVG <- mean(df_Hasbro_2016$VOL, na.rm = TRUE)
df_Hasbro_2016$VOL[is.na(df_Hasbro_2016$VOL)] <- VOL_AVG

#BID
BID_AVG <- BID_AVG <- mean(df_Hasbro_2016$BID, na.rm = TRUE)
df_Hasbro_2016$BID[is.na(df_Hasbro_2016$BID)] <- BID_AVG

#ASK
ASK_AVG <- ASK_AVG <- mean(df_Hasbro_2016$ASK, na.rm = TRUE)
df_Hasbro_2016$ASK[is.na(df_Hasbro_2016$ASK)] <- ASK_AVG

#OPENPRC
OPEN_AVG <- OPEN_AVG <- mean(df_Hasbro_2016$OPENPRC, na.rm = TRUE)
df_Hasbro_2016$OPENPRC[is.na(df_Hasbro_2016$OPENPRC)] <- OPEN_AVG

df_Hasbro_2016
```
```{r}
df_Hasbro_2016$text <- clean_tweets(df_Hasbro_2016$text)
df_Hasbro_2016$text <- lemmatize_words(df_Hasbro_2016$text)
```

```{r}
#Remove columns 
df_Hasbro_2018 <- df_Hasbro_2018[,-c(6,8,9,10,15,17,18,19,20,21)] 
df_Hasbro_2018 

#Ticker 
df_Hasbro_2018$TICKER[is.na(df_Hasbro_2018$TICKER)] <- "HAS"

#PRC
PRC_AVG <- mean(df_Hasbro_2018$PRC, na.rm = TRUE)
df_Hasbro_2018$PRC[is.na(df_Hasbro_2018$PRC)] <- PRC_AVG

#VOL
VOL_AVG <- mean(df_Hasbro_2018$VOL, na.rm = TRUE)
df_Hasbro_2018$VOL[is.na(df_Hasbro_2018$VOL)] <- VOL_AVG

#BID
BID_AVG <- BID_AVG <- mean(df_Hasbro_2018$BID, na.rm = TRUE)
df_Hasbro_2018$BID[is.na(df_Hasbro_2018$BID)] <- BID_AVG

#ASK
ASK_AVG <- ASK_AVG <- mean(df_Hasbro_2018$ASK, na.rm = TRUE)
df_Hasbro_2018$ASK[is.na(df_Hasbro_2018$ASK)] <- ASK_AVG

#OPENPRC
OPEN_AVG <- OPEN_AVG <- mean(df_Hasbro_2018$OPENPRC, na.rm = TRUE)
df_Hasbro_2018$OPENPRC[is.na(df_Hasbro_2018$OPENPRC)] <- OPEN_AVG

df_Hasbro_2018
```


```{r}
df_Hasbro_2018$text <- clean_tweets(df_Hasbro_2018$text)
df_Hasbro_2018$text <- lemmatize_words(df_Hasbro_2018$text)
```
```{r}
#Intel 
#Remove columns 
df_Intel_2016 <- df_Intel_2016[,-c(6,8,9,10,15,17,18,19,20,21)] 
df_Intel_2016 

#Ticker 
df_Intel_2016$TICKER[is.na(df_Intel_2016$TICKER)] <- "INTC"

#PRC
PRC_AVG <- mean(df_Intel_2016$PRC, na.rm = TRUE)
df_Intel_2016$PRC[is.na(df_Intel_2016$PRC)] <- PRC_AVG

#VOL
VOL_AVG <- mean(df_Intel_2016$VOL, na.rm = TRUE)
df_Intel_2016$VOL[is.na(df_Intel_2016$VOL)] <- VOL_AVG

#BID
BID_AVG <- BID_AVG <- mean(df_Intel_2016$BID, na.rm = TRUE)
df_Intel_2016$BID[is.na(df_Intel_2016$BID)] <- BID_AVG

#ASK
ASK_AVG <- ASK_AVG <- mean(df_Intel_2016$ASK, na.rm = TRUE)
df_Intel_2016$ASK[is.na(df_Intel_2016$ASK)] <- ASK_AVG

#OPENPRC
OPEN_AVG <- OPEN_AVG <- mean(df_Intel_2016$OPENPRC, na.rm = TRUE)
df_Intel_2016$OPENPRC[is.na(df_Intel_2016$OPENPRC)] <- OPEN_AVG

df_Intel_2016
```
```{r}
df_Intel_2016$text <- clean_tweets(df_Intel_2016$text)
df_Intel_2016$text <- lemmatize_words(df_Intel_2016$text)
```

```{r}
#Remove columns 
df_Intel_2018 <- df_Intel_2018[,-c(6,8,9,10,15,17,18,19,20,21)] 
df_Intel_2018 

#Ticker 
df_Intel_2018$TICKER[is.na(df_Intel_2018$TICKER)] <- "INTC"

#PRC
PRC_AVG <- mean(df_Intel_2018$PRC, na.rm = TRUE)
df_Intel_2018$PRC[is.na(df_Intel_2018$PRC)] <- PRC_AVG

#VOL
VOL_AVG <- mean(df_Intel_2018$VOL, na.rm = TRUE)
df_Intel_2018$VOL[is.na(df_Intel_2018$VOL)] <- VOL_AVG

#BID
BID_AVG <- BID_AVG <- mean(df_Intel_2018$BID, na.rm = TRUE)
df_Intel_2018$BID[is.na(df_Intel_2018$BID)] <- BID_AVG

#ASK
ASK_AVG <- ASK_AVG <- mean(df_Intel_2018$ASK, na.rm = TRUE)
df_Intel_2018$ASK[is.na(df_Intel_2018$ASK)] <- ASK_AVG

#OPENPRC
OPEN_AVG <- OPEN_AVG <- mean(df_Intel_2018$OPENPRC, na.rm = TRUE)
df_Intel_2018$OPENPRC[is.na(df_Intel_2018$OPENPRC)] <- OPEN_AVG

df_Intel_2018
```
```{r}
df_Intel_2018$text <- clean_tweets(df_Intel_2018$text)
df_Intel_2018$text <- lemmatize_words(df_Intel_2018$text)
```

```{r}
#Activision
#Remove columns 
df_Activision_2016 <- df_Activision_2016[,-c(6,8,9,10,15,17,18,19,20,21)] 
df_Activision_2016 

#Ticker 
df_Activision_2016$TICKER[is.na(df_Activision_2016$TICKER)] <- "ATVI"

#PRC
PRC_AVG <- mean(df_Activision_2016$PRC, na.rm = TRUE)
df_Activision_2016$PRC[is.na(df_Activision_2016$PRC)] <- PRC_AVG

#VOL
VOL_AVG <- mean(df_Activision_2016$VOL, na.rm = TRUE)
df_Activision_2016$VOL[is.na(df_Activision_2016$VOL)] <- VOL_AVG

#BID
BID_AVG <- BID_AVG <- mean(df_Activision_2016$BID, na.rm = TRUE)
df_Activision_2016$BID[is.na(df_Activision_2016$BID)] <- BID_AVG

#ASK
ASK_AVG <- ASK_AVG <- mean(df_Activision_2016$ASK, na.rm = TRUE)
df_Activision_2016$ASK[is.na(df_Activision_2016$ASK)] <- ASK_AVG

#OPENPRC
OPEN_AVG <- OPEN_AVG <- mean(df_Activision_2016$OPENPRC, na.rm = TRUE)
df_Activision_2016$OPENPRC[is.na(df_Activision_2016$OPENPRC)] <- OPEN_AVG

df_Activision_2016
```
```{r}
df_Activision_2016$text <- clean_tweets(df_Activision_2016$text)
df_Activision_2016$text <- lemmatize_words(df_Activision_2016$text)
```


```{r}
#Remove columns 
df_Activision_2018 <- df_Activision_2018[,-c(6,8,9,10,15,17,18,19,20,21)] 
df_Activision_2018 

#Ticker 
df_Activision_2018$TICKER[is.na(df_Activision_2018$TICKER)] <- "ATVI"

#PRC
PRC_AVG <- mean(df_Activision_2018$PRC, na.rm = TRUE)
df_Activision_2018$PRC[is.na(df_Activision_2018$PRC)] <- PRC_AVG

#VOL
VOL_AVG <- mean(df_Activision_2018$VOL, na.rm = TRUE)
df_Activision_2018$VOL[is.na(df_Activision_2018$VOL)] <- VOL_AVG

#BID
BID_AVG <- BID_AVG <- mean(df_Activision_2018$BID, na.rm = TRUE)
df_Activision_2018$BID[is.na(df_Activision_2018$BID)] <- BID_AVG

#ASK
ASK_AVG <- ASK_AVG <- mean(df_Activision_2018$ASK, na.rm = TRUE)
df_Activision_2018$ASK[is.na(df_Activision_2018$ASK)] <- ASK_AVG

#OPENPRC
OPEN_AVG <- OPEN_AVG <- mean(df_Activision_2018$OPENPRC, na.rm = TRUE)
df_Activision_2018$OPENPRC[is.na(df_Activision_2018$OPENPRC)] <- OPEN_AVG

df_Activision_2018
```
```{r}
df_Activision_2018$text <- clean_tweets(df_Activision_2018$text)
df_Activision_2018$text <- lemmatize_words(df_Activision_2018$text)
```

```{r}
#Inuit
#Remove columns 
df_Inuit_2016 <- df_Inuit_2016[,-c(6,8,9,10,15,17,18,19,20,21)] 
df_Inuit_2016 

#Ticker 
df_Inuit_2016$TICKER[is.na(df_Inuit_2016$TICKER)] <- "INTU"

#PRC
PRC_AVG <- mean(df_Inuit_2016$PRC, na.rm = TRUE)
df_Inuit_2016$PRC[is.na(df_Inuit_2016$PRC)] <- PRC_AVG

#VOL
VOL_AVG <- mean(df_Inuit_2016$VOL, na.rm = TRUE)
df_Inuit_2016$VOL[is.na(df_Inuit_2016$VOL)] <- VOL_AVG

#BID
BID_AVG <- BID_AVG <- mean(df_Inuit_2016$BID, na.rm = TRUE)
df_Inuit_2016$BID[is.na(df_Inuit_2016$BID)] <- BID_AVG

#ASK
ASK_AVG <- ASK_AVG <- mean(df_Inuit_2016$ASK, na.rm = TRUE)
df_Inuit_2016$ASK[is.na(df_Inuit_2016$ASK)] <- ASK_AVG

#OPENPRC
OPEN_AVG <- OPEN_AVG <- mean(df_Inuit_2016$OPENPRC, na.rm = TRUE)
df_Inuit_2016$OPENPRC[is.na(df_Inuit_2016$OPENPRC)] <- OPEN_AVG

df_Inuit_2016
```
```{r}
df_Inuit_2016$text <- clean_tweets(df_Inuit_2016$text)
df_Inuit_2016$text <- lemmatize_words(df_Inuit_2016$text)
```

```{r}
#Remove columns 
df_Inuit_2018 <- df_Inuit_2018[,-c(6,8,9,10,15,17,18,19,20,21)] 
df_Inuit_2018


#Ticker 
df_Inuit_2018$TICKER[is.na(df_Inuit_2018$TICKER)] <- "INTU"

#PRC
PRC_AVG <- mean(df_Inuit_2018$PRC, na.rm = TRUE)
df_Inuit_2018$PRC[is.na(df_Inuit_2018$PRC)] <- PRC_AVG

#VOL
VOL_AVG <- mean(df_Inuit_2018$VOL, na.rm = TRUE)
df_Inuit_2018$VOL[is.na(df_Inuit_2018$VOL)] <- VOL_AVG

#BID
BID_AVG <- BID_AVG <- mean(df_Inuit_2018$BID, na.rm = TRUE)
df_Inuit_2018$BID[is.na(df_Inuit_2018$BID)] <- BID_AVG

#ASK
ASK_AVG <- ASK_AVG <- mean(df_Inuit_2018$ASK, na.rm = TRUE)
df_Inuit_2018$ASK[is.na(df_Inuit_2018$ASK)] <- ASK_AVG

#OPENPRC
OPEN_AVG <- OPEN_AVG <- mean(df_Inuit_2018$OPENPRC, na.rm = TRUE)
df_Inuit_2018$OPENPRC[is.na(df_Inuit_2018$OPENPRC)] <- OPEN_AVG

df_Inuit_2018
```
```{r}
df_Inuit_2018$text <- clean_tweets(df_Inuit_2018$text)
df_Inuit_2018$text <- lemmatize_words(df_Inuit_2018$text)
```

```{r}
#Allergan 
#Remove columns 
df_Allergan_2016 <- df_Allergan_2016[,-c(6,8,9,10,15,17,18,19,20,21)] 
df_Allergan_2016

#Ticker 
df_Allergan_2016$TICKER[is.na(df_Allergan_2016$TICKER)] <- "AGN"

#PRC
PRC_AVG <- mean(df_Allergan_2016$PRC, na.rm = TRUE)
df_Allergan_2016$PRC[is.na(df_Allergan_2016$PRC)] <- PRC_AVG

#VOL
VOL_AVG <- mean(df_Allergan_2016$VOL, na.rm = TRUE)
df_Allergan_2016$VOL[is.na(df_Allergan_2016$VOL)] <- VOL_AVG

#BID
BID_AVG <- BID_AVG <- mean(df_Allergan_2016$BID, na.rm = TRUE)
df_Allergan_2016$BID[is.na(df_Allergan_2016$BID)] <- BID_AVG

#ASK
ASK_AVG <- ASK_AVG <- mean(df_Allergan_2016$ASK, na.rm = TRUE)
df_Allergan_2016$ASK[is.na(df_Allergan_2016$ASK)] <- ASK_AVG

#OPENPRC
OPEN_AVG <- OPEN_AVG <- mean(df_Allergan_2016$OPENPRC, na.rm = TRUE)
df_Allergan_2016$OPENPRC[is.na(df_Allergan_2016$OPENPRC)] <- OPEN_AVG

df_Allergan_2016
```
```{r}
df_Allergan_2016$text <- clean_tweets(df_Allergan_2016$text)
df_Allergan_2016$text <- lemmatize_words(df_Allergan_2016$text)
```

```{r}
#Remove columns 
df_Allergan_2018 <- df_Allergan_2018[,-c(6,8,9,10,15,17,18,19,20,21)] 
df_Allergan_2018

#Ticker 
df_Allergan_2018$TICKER[is.na(df_Allergan_2018$TICKER)] <- "AGN"

#PRC
PRC_AVG <- mean(df_Allergan_2018$PRC, na.rm = TRUE)
df_Allergan_2018$PRC[is.na(df_Allergan_2018$PRC)] <- PRC_AVG

#VOL
VOL_AVG <- mean(df_Allergan_2018$VOL, na.rm = TRUE)
df_Allergan_2018$VOL[is.na(df_Allergan_2018$VOL)] <- VOL_AVG

#BID
BID_AVG <- BID_AVG <- mean(df_Allergan_2018$BID, na.rm = TRUE)
df_Allergan_2018$BID[is.na(df_Allergan_2018$BID)] <- BID_AVG

#ASK
ASK_AVG <- ASK_AVG <- mean(df_Allergan_2018$ASK, na.rm = TRUE)
df_Allergan_2018$ASK[is.na(df_Allergan_2018$ASK)] <- ASK_AVG

#OPENPRC
OPEN_AVG <- OPEN_AVG <- mean(df_Allergan_2018$OPENPRC, na.rm = TRUE)
df_Allergan_2018$OPENPRC[is.na(df_Allergan_2018$OPENPRC)] <- OPEN_AVG

df_Allergan_2018
```


```{r}
df_Allergan_2018$text <- clean_tweets(df_Allergan_2018$text)
df_Allergan_2018$text <- lemmatize_words(df_Allergan_2018$text)
```

```{r}
#Humana
#Remove columns 
df_Humana_2016 <- df_Humana_2016[,-c(6,8,9,10,15,17,18,19,20,21)] 
df_Humana_2016

#Ticker 
df_Humana_2016$TICKER[is.na(df_Humana_2016$TICKER)] <- "HUM"

#PRC
PRC_AVG <- mean(df_Humana_2016$PRC, na.rm = TRUE)
df_Humana_2016$PRC[is.na(df_Humana_2016$PRC)] <- PRC_AVG

#VOL
VOL_AVG <- mean(df_Humana_2016$VOL, na.rm = TRUE)
df_Humana_2016$VOL[is.na(df_Humana_2016$VOL)] <- VOL_AVG

#BID
BID_AVG <- BID_AVG <- mean(df_Humana_2016$BID, na.rm = TRUE)
df_Humana_2016$BID[is.na(df_Humana_2016$BID)] <- BID_AVG

#ASK
ASK_AVG <- ASK_AVG <- mean(df_Humana_2016$ASK, na.rm = TRUE)
df_Humana_2016$ASK[is.na(df_Humana_2016$ASK)] <- ASK_AVG

#OPENPRC
OPEN_AVG <- OPEN_AVG <- mean(df_Humana_2016$OPENPRC, na.rm = TRUE)
df_Humana_2016$OPENPRC[is.na(df_Humana_2016$OPENPRC)] <- OPEN_AVG

df_Humana_2016
```


```{r}
df_Humana_2016$text <- clean_tweets(df_Humana_2016$text)
df_Humana_2016$text <- lemmatize_words(df_Humana_2016$text)
```
```{r}
#Remove columns 
df_Humana_2018 <- df_Humana_2018[,-c(6,8,9,10,15,17,18,19,20,21)] 
df_Humana_2018

#Ticker 
df_Humana_2018$TICKER[is.na(df_Humana_2018$TICKER)] <- "HUM"

#PRC
PRC_AVG <- mean(df_Humana_2018$PRC, na.rm = TRUE)
df_Humana_2018$PRC[is.na(df_Humana_2018$PRC)] <- PRC_AVG

#VOL
VOL_AVG <- mean(df_Humana_2018$VOL, na.rm = TRUE)
df_Humana_2018$VOL[is.na(df_Humana_2018$VOL)] <- VOL_AVG

#BID
BID_AVG <- BID_AVG <- mean(df_Humana_2018$BID, na.rm = TRUE)
df_Humana_2018$BID[is.na(df_Humana_2018$BID)] <- BID_AVG

#ASK
ASK_AVG <- ASK_AVG <- mean(df_Humana_2018$ASK, na.rm = TRUE)
df_Humana_2018$ASK[is.na(df_Humana_2018$ASK)] <- ASK_AVG

#OPENPRC
OPEN_AVG <- OPEN_AVG <- mean(df_Humana_2018$OPENPRC, na.rm = TRUE)
df_Humana_2018$OPENPRC[is.na(df_Humana_2018$OPENPRC)] <- OPEN_AVG

df_Humana_2018

```
```{r}
df_Humana_2018$text <- clean_tweets(df_Humana_2018$text)
df_Humana_2018$text <- lemmatize_words(df_Humana_2018$text)
```

```{r}
#CB Autodesk 
#Remove columns 
df_CB_autodesk_2016 <- df_CB_autodesk_2016[,-c(6,8,9,10,15,17,18,19,20,21)] 
df_CB_autodesk_2016

#Ticker 
df_CB_autodesk_2016$TICKER[is.na(df_CB_autodesk_2016$TICKER)] <- "ADSK"

#PRC
PRC_AVG <- mean(df_CB_autodesk_2016$PRC, na.rm = TRUE)
df_CB_autodesk_2016$PRC[is.na(df_CB_autodesk_2016$PRC)] <- PRC_AVG

#VOL
VOL_AVG <- mean(df_CB_autodesk_2016$VOL, na.rm = TRUE)
df_CB_autodesk_2016$VOL[is.na(df_CB_autodesk_2016$VOL)] <- VOL_AVG

#BID
BID_AVG <- BID_AVG <- mean(df_CB_autodesk_2016$BID, na.rm = TRUE)
df_CB_autodesk_2016$BID[is.na(df_CB_autodesk_2016$BID)] <- BID_AVG

#ASK
ASK_AVG <- ASK_AVG <- mean(df_CB_autodesk_2016$ASK, na.rm = TRUE)
df_CB_autodesk_2016$ASK[is.na(df_CB_autodesk_2016$ASK)] <- ASK_AVG

#OPENPRC
OPEN_AVG <- OPEN_AVG <- mean(df_CB_autodesk_2016$OPENPRC, na.rm = TRUE)
df_CB_autodesk_2016$OPENPRC[is.na(df_CB_autodesk_2016$OPENPRC)] <- OPEN_AVG

df_CB_autodesk_2016
```
```{r}
df_CB_autodesk_2016$text <- clean_tweets(df_CB_autodesk_2016$text)
df_CB_autodesk_2016$text <- lemmatize_words(df_CB_autodesk_2016$text)
```

```{r}
#Remove columns 
df_CB_autodesk_2018 <- df_CB_autodesk_2018[,-c(6,8,9,10,15,17,18,19,20,21)] 
df_CB_autodesk_2018

#Ticker 
df_CB_autodesk_2018$TICKER[is.na(df_CB_autodesk_2018$TICKER)] <- "ADSK"

#PRC
PRC_AVG <- mean(df_CB_autodesk_2018$PRC, na.rm = TRUE)
df_CB_autodesk_2018$PRC[is.na(df_CB_autodesk_2018$PRC)] <- PRC_AVG

#VOL
VOL_AVG <- mean(df_CB_autodesk_2018$VOL, na.rm = TRUE)
df_CB_autodesk_2018$VOL[is.na(df_CB_autodesk_2018$VOL)] <- VOL_AVG

#BID
BID_AVG <- BID_AVG <- mean(df_CB_autodesk_2018$BID, na.rm = TRUE)
df_CB_autodesk_2018$BID[is.na(df_CB_autodesk_2018$BID)] <- BID_AVG

#ASK
ASK_AVG <- ASK_AVG <- mean(df_CB_autodesk_2018$ASK, na.rm = TRUE)
df_CB_autodesk_2018$ASK[is.na(df_CB_autodesk_2018$ASK)] <- ASK_AVG

#OPENPRC
OPEN_AVG <- OPEN_AVG <- mean(df_CB_autodesk_2018$OPENPRC, na.rm = TRUE)
df_CB_autodesk_2018$OPENPRC[is.na(df_CB_autodesk_2018$OPENPRC)] <- OPEN_AVG

df_CB_autodesk_2018
```


```{r}
df_CB_autodesk_2018$text <- clean_tweets(df_CB_autodesk_2018$text)
df_CB_autodesk_2018$text <- lemmatize_words(df_CB_autodesk_2018$text)
```
```{r}
#Equinox
#Remove columns 
df_equinox_2018 <- df_equinox_2018[,-c(6,8,9,10,15,17,18,19,20,21)] 
df_equinox_2018

#Ticker 
df_equinox_2018$TICKER[is.na(df_equinox_2018$TICKER)] <- "EQIX"

#PRC
PRC_AVG <- mean(df_equinox_2018$PRC, na.rm = TRUE)
df_equinox_2018$PRC[is.na(df_equinox_2018$PRC)] <- PRC_AVG

#VOL
VOL_AVG <- mean(df_equinox_2018$VOL, na.rm = TRUE)
df_equinox_2018$VOL[is.na(df_equinox_2018$VOL)] <- VOL_AVG

#BID
BID_AVG <- BID_AVG <- mean(df_equinox_2018$BID, na.rm = TRUE)
df_equinox_2018$BID[is.na(df_equinox_2018$BID)] <- BID_AVG

#ASK
ASK_AVG <- ASK_AVG <- mean(df_equinox_2018$ASK, na.rm = TRUE)
df_equinox_2018$ASK[is.na(df_equinox_2018$ASK)] <- ASK_AVG

#OPENPRC
OPEN_AVG <- OPEN_AVG <- mean(df_equinox_2018$OPENPRC, na.rm = TRUE)
df_equinox_2018$OPENPRC[is.na(df_equinox_2018$OPENPRC)] <- OPEN_AVG

df_equinox_2018
```
```{r}
df_equinox_2018$text <- clean_tweets(df_equinox_2018$text)
df_equinox_2018$text <- lemmatize_words(df_equinox_2018$text)
```

```{r}
#Cisco
#Remove columns 
df_cisco_2016 <- df_cisco_2016[,-c(6,8,9,10,15,17,18,19,20,21)] 
df_cisco_2016

#Ticker 
df_cisco_2016$TICKER[is.na(df_cisco_2016$TICKER)] <- "CSCO"

#PRC
PRC_AVG <- mean(df_cisco_2016$PRC, na.rm = TRUE)
df_cisco_2016$PRC[is.na(df_cisco_2016$PRC)] <- PRC_AVG

#VOL
VOL_AVG <- mean(df_cisco_2016$VOL, na.rm = TRUE)
df_cisco_2016$VOL[is.na(df_cisco_2016$VOL)] <- VOL_AVG

#BID
BID_AVG <- BID_AVG <- mean(df_cisco_2016$BID, na.rm = TRUE)
df_cisco_2016$BID[is.na(df_cisco_2016$BID)] <- BID_AVG

#ASK
ASK_AVG <- ASK_AVG <- mean(df_cisco_2016$ASK, na.rm = TRUE)
df_cisco_2016$ASK[is.na(df_cisco_2016$ASK)] <- ASK_AVG

#OPENPRC
OPEN_AVG <- OPEN_AVG <- mean(df_cisco_2016$OPENPRC, na.rm = TRUE)
df_cisco_2016$OPENPRC[is.na(df_cisco_2016$OPENPRC)] <- OPEN_AVG

df_cisco_2016
```
```{r}
df_cisco_2016$text <- clean_tweets(df_cisco_2016$text)
df_cisco_2016$text <- lemmatize_words(df_cisco_2016$text)
```

```{r}
#Remove columns 
df_cisco_2018 <- df_cisco_2018[,-c(6,8,9,10,15,17,18,19,20,21)] 
df_cisco_2018

#Ticker 
df_cisco_2018$TICKER[is.na(df_cisco_2018$TICKER)] <- "CSCO"

#PRC
PRC_AVG <- mean(df_cisco_2018$PRC, na.rm = TRUE)
df_cisco_2018$PRC[is.na(df_cisco_2018$PRC)] <- PRC_AVG

#VOL
VOL_AVG <- mean(df_cisco_2018$VOL, na.rm = TRUE)
df_cisco_2018$VOL[is.na(df_cisco_2018$VOL)] <- VOL_AVG

#BID
BID_AVG <- BID_AVG <- mean(df_cisco_2018$BID, na.rm = TRUE)
df_cisco_2018$BID[is.na(df_cisco_2018$BID)] <- BID_AVG

#ASK
ASK_AVG <- ASK_AVG <- mean(df_cisco_2018$ASK, na.rm = TRUE)
df_cisco_2018$ASK[is.na(df_cisco_2018$ASK)] <- ASK_AVG

#OPENPRC
OPEN_AVG <- OPEN_AVG <- mean(df_cisco_2018$OPENPRC, na.rm = TRUE)
df_cisco_2018$OPENPRC[is.na(df_cisco_2018$OPENPRC)] <- OPEN_AVG

df_cisco_2018
```


```{r}
df_cisco_2018$text <- clean_tweets(df_cisco_2018$text)
df_cisco_2018$text <- lemmatize_words(df_cisco_2018$text)
```

```{r}
#EBAY
#Remove columns 
df_EBAY_2016 <- df_EBAY_2016[,-c(6,8,9,10,15,17,18,19,20,21)] 
df_EBAY_2016

#Ticker 
df_EBAY_2016$TICKER[is.na(df_EBAY_2016$TICKER)] <- "EBAY"

#PRC
PRC_AVG <- mean(df_EBAY_2016$PRC, na.rm = TRUE)
df_EBAY_2016$PRC[is.na(df_EBAY_2016$PRC)] <- PRC_AVG

#VOL
VOL_AVG <- mean(df_EBAY_2016$VOL, na.rm = TRUE)
df_EBAY_2016$VOL[is.na(df_EBAY_2016$VOL)] <- VOL_AVG

#BID
BID_AVG <- BID_AVG <- mean(df_EBAY_2016$BID, na.rm = TRUE)
df_EBAY_2016$BID[is.na(df_EBAY_2016$BID)] <- BID_AVG

#ASK
ASK_AVG <- ASK_AVG <- mean(df_EBAY_2016$ASK, na.rm = TRUE)
df_EBAY_2016$ASK[is.na(df_EBAY_2016$ASK)] <- ASK_AVG

#OPENPRC
OPEN_AVG <- OPEN_AVG <- mean(df_EBAY_2016$OPENPRC, na.rm = TRUE)
df_EBAY_2016$OPENPRC[is.na(df_EBAY_2016$OPENPRC)] <- OPEN_AVG

df_EBAY_2016
```
```{r}
df_EBAY_2016$text <- clean_tweets(df_EBAY_2016$text)
df_EBAY_2016$text <- lemmatize_words(df_EBAY_2016$text)
```

```{r}
#Remove columns 
df_EBAY_2018 <- df_EBAY_2018[,-c(6,8,9,10,15,17,18,19,20,21)] 
df_EBAY_2018

#Ticker 
df_EBAY_2018$TICKER[is.na(df_EBAY_2018$TICKER)] <- "EBAY"

#PRC
PRC_AVG <- mean(df_EBAY_2018$PRC, na.rm = TRUE)
df_EBAY_2018$PRC[is.na(df_EBAY_2018$PRC)] <- PRC_AVG

#VOL
VOL_AVG <- mean(df_EBAY_2018$VOL, na.rm = TRUE)
df_EBAY_2018$VOL[is.na(df_EBAY_2018$VOL)] <- VOL_AVG

#BID
BID_AVG <- BID_AVG <- mean(df_EBAY_2018$BID, na.rm = TRUE)
df_EBAY_2018$BID[is.na(df_EBAY_2018$BID)] <- BID_AVG

#ASK
ASK_AVG <- ASK_AVG <- mean(df_EBAY_2018$ASK, na.rm = TRUE)
df_EBAY_2018$ASK[is.na(df_EBAY_2018$ASK)] <- ASK_AVG

#OPENPRC
OPEN_AVG <- OPEN_AVG <- mean(df_EBAY_2018$OPENPRC, na.rm = TRUE)
df_EBAY_2018$OPENPRC[is.na(df_EBAY_2018$OPENPRC)] <- OPEN_AVG

df_EBAY_2018
```
```{r}
df_EBAY_2018$text <- clean_tweets(df_EBAY_2018$text)
df_EBAY_2018$text <- lemmatize_words(df_EBAY_2018$text)
```

```{r}
#Davita
#Remove columns 
df_Davita_2016 <- df_Davita_2016[,-c(6,8,9,10,15,17,18,19,20,21)] 
df_Davita_2016

#Ticker 
df_Davita_2016$TICKER[is.na(df_Davita_2016$TICKER)] <- "DVA"

#PRC
PRC_AVG <- mean(df_Davita_2016$PRC, na.rm = TRUE)
df_Davita_2016$PRC[is.na(df_Davita_2016$PRC)] <- PRC_AVG

#VOL
VOL_AVG <- mean(df_Davita_2016$VOL, na.rm = TRUE)
df_Davita_2016$VOL[is.na(df_Davita_2016$VOL)] <- VOL_AVG

#BID
BID_AVG <- BID_AVG <- mean(df_Davita_2016$BID, na.rm = TRUE)
df_Davita_2016$BID[is.na(df_Davita_2016$BID)] <- BID_AVG

#ASK
ASK_AVG <- ASK_AVG <- mean(df_Davita_2016$ASK, na.rm = TRUE)
df_Davita_2016$ASK[is.na(df_Davita_2016$ASK)] <- ASK_AVG

#OPENPRC
OPEN_AVG <- OPEN_AVG <- mean(df_Davita_2016$OPENPRC, na.rm = TRUE)
df_Davita_2016$OPENPRC[is.na(df_Davita_2016$OPENPRC)] <- OPEN_AVG

df_Davita_2016
```
```{r}
df_Davita_2016$text <- clean_tweets(df_Davita_2016$text)
df_Davita_2016$text <- lemmatize_words(df_Davita_2016$text)
```

```{r}
#Remove columns 
df_Davita_2017 <- df_Davita_2017[,-c(6,8,9,10,15,17,18,19,20,21)] 
df_Davita_2017

#Ticker 
df_Davita_2017$TICKER[is.na(df_Davita_2017$TICKER)] <- "DVA"

#PRC
PRC_AVG <- mean(df_Davita_2017$PRC, na.rm = TRUE)
df_Davita_2017$PRC[is.na(df_Davita_2017$PRC)] <- PRC_AVG

#VOL
VOL_AVG <- mean(df_Davita_2017$VOL, na.rm = TRUE)
df_Davita_2017$VOL[is.na(df_Davita_2017$VOL)] <- VOL_AVG

#BID
BID_AVG <- BID_AVG <- mean(df_Davita_2017$BID, na.rm = TRUE)
df_Davita_2017$BID[is.na(df_Davita_2017$BID)] <- BID_AVG

#ASK
ASK_AVG <- ASK_AVG <- mean(df_Davita_2017$ASK, na.rm = TRUE)
df_Davita_2017$ASK[is.na(df_Davita_2017$ASK)] <- ASK_AVG

#OPENPRC
OPEN_AVG <- OPEN_AVG <- mean(df_Davita_2017$OPENPRC, na.rm = TRUE)
df_Davita_2017$OPENPRC[is.na(df_Davita_2017$OPENPRC)] <- OPEN_AVG

df_Davita_2017
```
```{r}
df_Davita_2017$text <- clean_tweets(df_Davita_2017$text)
df_Davita_2017$text <- lemmatize_words(df_Davita_2017$text)
```

```{r}
#Illumina 
#Remove columns 
df_Illumina_2016 <- df_Illumina_2016[,-c(6,8,9,10,15,17,18,19,20,21)] 
df_Illumina_2016

#Ticker 
df_Illumina_2016$TICKER[is.na(df_Illumina_2016$TICKER)] <- "ILMN"

#PRC
PRC_AVG <- mean(df_Illumina_2016$PRC, na.rm = TRUE)
df_Illumina_2016$PRC[is.na(df_Illumina_2016$PRC)] <- PRC_AVG

#VOL
VOL_AVG <- mean(df_Illumina_2016$VOL, na.rm = TRUE)
df_Illumina_2016$VOL[is.na(df_Illumina_2016$VOL)] <- VOL_AVG

#BID
BID_AVG <- BID_AVG <- mean(df_Illumina_2016$BID, na.rm = TRUE)
df_Illumina_2016$BID[is.na(df_Illumina_2016$BID)] <- BID_AVG

#ASK
ASK_AVG <- ASK_AVG <- mean(df_Illumina_2016$ASK, na.rm = TRUE)
df_Illumina_2016$ASK[is.na(df_Illumina_2016$ASK)] <- ASK_AVG

#OPENPRC
OPEN_AVG <- OPEN_AVG <- mean(df_Illumina_2016$OPENPRC, na.rm = TRUE)
df_Illumina_2016$OPENPRC[is.na(df_Illumina_2016$OPENPRC)] <- OPEN_AVG

df_Illumina_2016
```
```{r}
df_Illumina_2016$text <- clean_tweets(df_Illumina_2016$text)
df_Illumina_2016$text <- lemmatize_words(df_Illumina_2016$text)
```

```{r}
#Remove columns 
df_Illumina_2018 <- df_Illumina_2018[,-c(6,8,9,10,15,17,18,19,20,21)] 
df_Illumina_2018

#Ticker 
df_Illumina_2018$TICKER[is.na(df_Illumina_2018$TICKER)] <- "ILMN"

#PRC
PRC_AVG <- mean(df_Illumina_2018$PRC, na.rm = TRUE)
df_Illumina_2018$PRC[is.na(df_Illumina_2018$PRC)] <- PRC_AVG

#VOL
VOL_AVG <- mean(df_Illumina_2018$VOL, na.rm = TRUE)
df_Illumina_2018$VOL[is.na(df_Illumina_2018$VOL)] <- VOL_AVG

#BID
BID_AVG <- BID_AVG <- mean(df_Illumina_2018$BID, na.rm = TRUE)
df_Illumina_2018$BID[is.na(df_Illumina_2018$BID)] <- BID_AVG

#ASK
ASK_AVG <- ASK_AVG <- mean(df_Illumina_2018$ASK, na.rm = TRUE)
df_Illumina_2018$ASK[is.na(df_Illumina_2018$ASK)] <- ASK_AVG

#OPENPRC
OPEN_AVG <- OPEN_AVG <- mean(df_Illumina_2018$OPENPRC, na.rm = TRUE)
df_Illumina_2018$OPENPRC[is.na(df_Illumina_2018$OPENPRC)] <- OPEN_AVG

df_Illumina_2018
```


```{r}
df_Illumina_2018$text <- clean_tweets(df_Illumina_2018$text)
df_Illumina_2018$text <- lemmatize_words(df_Illumina_2018$text)
```


```{r}
```

```{r}
#Homedepot 
#Remove columns 
df_Homedepot_2013 <- df_Homedepot_2013[,-c(6,8,9,10,15,17,18,19,20,21)] 
df_Homedepot_2013

#Ticker 
df_Homedepot_2013$TICKER[is.na(df_Homedepot_2013$TICKER)] <- "HD"

#PRC
PRC_AVG <- mean(df_Homedepot_2013$PRC, na.rm = TRUE)
df_Homedepot_2013$PRC[is.na(df_Homedepot_2013$PRC)] <- PRC_AVG

#VOL
VOL_AVG <- mean(df_Homedepot_2013$VOL, na.rm = TRUE)
df_Homedepot_2013$VOL[is.na(df_Homedepot_2013$VOL)] <- VOL_AVG

#BID
BID_AVG <- BID_AVG <- mean(df_Homedepot_2013$BID, na.rm = TRUE)
df_Homedepot_2013$BID[is.na(df_Homedepot_2013$BID)] <- BID_AVG

#ASK
ASK_AVG <- ASK_AVG <- mean(df_Homedepot_2013$ASK, na.rm = TRUE)
df_Homedepot_2013$ASK[is.na(df_Homedepot_2013$ASK)] <- ASK_AVG

#OPENPRC
OPEN_AVG <- OPEN_AVG <- mean(df_Homedepot_2013$OPENPRC, na.rm = TRUE)
df_Homedepot_2013$OPENPRC[is.na(df_Homedepot_2013$OPENPRC)] <- OPEN_AVG

df_Homedepot_2013
```
```{r}
df_Homedepot_2013$text <- clean_tweets(df_Homedepot_2013$text)
df_Homedepot_2013$text <- lemmatize_words(df_Homedepot_2013$text)
```

```{r}
#Southwest Airlines 
#Remove columns 
df_Southwest_2016 <- df_Southwest_2016[,-c(6,8,9,10,15,17,18,19,20,21)] 
df_Southwest_2016

#Ticker 
df_Southwest_2016$TICKER[is.na(df_Southwest_2016$TICKER)] <- "LUV"

#PRC
PRC_AVG <- mean(df_Southwest_2016$PRC, na.rm = TRUE)
df_Southwest_2016$PRC[is.na(df_Southwest_2016$PRC)] <- PRC_AVG

#VOL
VOL_AVG <- mean(df_Southwest_2016$VOL, na.rm = TRUE)
df_Southwest_2016$VOL[is.na(df_Southwest_2016$VOL)] <- VOL_AVG

#BID
BID_AVG <- BID_AVG <- mean(df_Southwest_2016$BID, na.rm = TRUE)
df_Southwest_2016$BID[is.na(df_Southwest_2016$BID)] <- BID_AVG

#ASK
ASK_AVG <- ASK_AVG <- mean(df_Southwest_2016$ASK, na.rm = TRUE)
df_Southwest_2016$ASK[is.na(df_Southwest_2016$ASK)] <- ASK_AVG

#OPENPRC
OPEN_AVG <- OPEN_AVG <- mean(df_Southwest_2016$OPENPRC, na.rm = TRUE)
df_Southwest_2016$OPENPRC[is.na(df_Southwest_2016$OPENPRC)] <- OPEN_AVG

df_Southwest_2016
```
```{r}
df_Southwest_2016$text <- clean_tweets(df_Southwest_2016$text)
df_Southwest_2016$text <- lemmatize_words(df_Southwest_2016$text)
```

```{r}
#Remove columns 
df_Southwest_2018 <- df_Southwest_2018[,-c(6,8,9,10,15,17,18,19,20,21)] 
df_Southwest_2018

#Ticker 
df_Southwest_2018$TICKER[is.na(df_Southwest_2018$TICKER)] <- "LUV"

#PRC
PRC_AVG <- mean(df_Southwest_2018$PRC, na.rm = TRUE)
df_Southwest_2018$PRC[is.na(df_Southwest_2018$PRC)] <- PRC_AVG

#VOL
VOL_AVG <- mean(df_Southwest_2018$VOL, na.rm = TRUE)
df_Southwest_2018$VOL[is.na(df_Southwest_2018$VOL)] <- VOL_AVG

#BID
BID_AVG <- BID_AVG <- mean(df_Southwest_2018$BID, na.rm = TRUE)
df_Southwest_2018$BID[is.na(df_Southwest_2018$BID)] <- BID_AVG

#ASK
ASK_AVG <- ASK_AVG <- mean(df_Southwest_2018$ASK, na.rm = TRUE)
df_Southwest_2018$ASK[is.na(df_Southwest_2018$ASK)] <- ASK_AVG

#OPENPRC
OPEN_AVG <- OPEN_AVG <- mean(df_Southwest_2018$OPENPRC, na.rm = TRUE)
df_Southwest_2018$OPENPRC[is.na(df_Southwest_2018$OPENPRC)] <- OPEN_AVG

df_Southwest_2018
```
```{r}
df_Southwest_2018$text <- clean_tweets(df_Southwest_2018$text)
df_Southwest_2018$text <- lemmatize_words(df_Southwest_2018$text)
```

```{r}
#FIS
#Remove columns 
df_FIS_2016 <- df_FIS_2016[,-c(6,8,9,10,15,17,18,19,20,21)] 
df_FIS_2016

#Ticker 
df_FIS_2016$TICKER[is.na(df_FIS_2016$TICKER)] <- "FIS"

#PRC
PRC_AVG <- mean(df_FIS_2016$PRC, na.rm = TRUE)
df_FIS_2016$PRC[is.na(df_FIS_2016$PRC)] <- PRC_AVG

#VOL
VOL_AVG <- mean(df_FIS_2016$VOL, na.rm = TRUE)
df_FIS_2016$VOL[is.na(df_FIS_2016$VOL)] <- VOL_AVG

#BID
BID_AVG <- BID_AVG <- mean(df_FIS_2016$BID, na.rm = TRUE)
df_FIS_2016$BID[is.na(df_FIS_2016$BID)] <- BID_AVG

#ASK
ASK_AVG <- ASK_AVG <- mean(df_FIS_2016$ASK, na.rm = TRUE)
df_FIS_2016$ASK[is.na(df_FIS_2016$ASK)] <- ASK_AVG

#OPENPRC
OPEN_AVG <- OPEN_AVG <- mean(df_FIS_2016$OPENPRC, na.rm = TRUE)
df_FIS_2016$OPENPRC[is.na(df_FIS_2016$OPENPRC)] <- OPEN_AVG

df_FIS_2016
```
```{r}
df_FIS_2016$text <- clean_tweets(df_FIS_2016$text)
df_FIS_2016$text <- lemmatize_words(df_FIS_2016$text)
```

```{r}
#Remove columns 
df_FIS_2018 <- df_FIS_2018[,-c(6,8,9,10,15,17,18,19,20,21)] 
df_FIS_2018

#Ticker 
df_FIS_2018$TICKER[is.na(df_FIS_2018$TICKER)] <- "FIS"

#PRC
PRC_AVG <- mean(df_FIS_2018$PRC, na.rm = TRUE)
df_FIS_2018$PRC[is.na(df_FIS_2018$PRC)] <- PRC_AVG

#VOL
VOL_AVG <- mean(df_FIS_2018$VOL, na.rm = TRUE)
df_FIS_2018$VOL[is.na(df_FIS_2018$VOL)] <- VOL_AVG

#BID
BID_AVG <- BID_AVG <- mean(df_FIS_2018$BID, na.rm = TRUE)
df_FIS_2018$BID[is.na(df_FIS_2018$BID)] <- BID_AVG

#ASK
ASK_AVG <- ASK_AVG <- mean(df_FIS_2018$ASK, na.rm = TRUE)
df_FIS_2018$ASK[is.na(df_FIS_2018$ASK)] <- ASK_AVG

#OPENPRC
OPEN_AVG <- OPEN_AVG <- mean(df_FIS_2018$OPENPRC, na.rm = TRUE)
df_FIS_2018$OPENPRC[is.na(df_FIS_2018$OPENPRC)] <- OPEN_AVG

df_FIS_2018
```


```{r}
df_FIS_2018$text <- clean_tweets(df_FIS_2018$text)
df_FIS_2018$text <- lemmatize_words(df_FIS_2018$text)
```


```{r}
```
```{r}
#Leucadia nation
#Remove columns 
df_Leucadia_2016 <- df_Leucadia_2016[,-c(6,8,9,10,15,17,18,19,20,21)] 
df_Leucadia_2016

#Ticker 
df_Leucadia_2016$TICKER[is.na(df_Leucadia_2016$TICKER)] <- "LUK"

#PRC
PRC_AVG <- mean(df_Leucadia_2016$PRC, na.rm = TRUE)
df_Leucadia_2016$PRC[is.na(df_Leucadia_2016$PRC)] <- PRC_AVG

#VOL
VOL_AVG <- mean(df_Leucadia_2016$VOL, na.rm = TRUE)
df_Leucadia_2016$VOL[is.na(df_Leucadia_2016$VOL)] <- VOL_AVG

#BID
BID_AVG <- BID_AVG <- mean(df_Leucadia_2016$BID, na.rm = TRUE)
df_Leucadia_2016$BID[is.na(df_Leucadia_2016$BID)] <- BID_AVG

#ASK
ASK_AVG <- ASK_AVG <- mean(df_Leucadia_2016$ASK, na.rm = TRUE)
df_Leucadia_2016$ASK[is.na(df_Leucadia_2016$ASK)] <- ASK_AVG

#OPENPRC
OPEN_AVG <- OPEN_AVG <- mean(df_Leucadia_2016$OPENPRC, na.rm = TRUE)
df_Leucadia_2016$OPENPRC[is.na(df_Leucadia_2016$OPENPRC)] <- OPEN_AVG

df_Leucadia_2016
```


```{r}
df_Leucadia_2016$text <- clean_tweets(df_Leucadia_2016$text)
df_Leucadia_2016$text <- lemmatize_words(df_Leucadia_2016$text)
```


```{r}
```
```{r}
#Remove columns 
df_Leucadia_2018 <- df_Leucadia_2018[,-c(6,8,9,10,15,17,18,19,20,21)] 
df_Leucadia_2018

#Ticker 
df_Leucadia_2018$TICKER[is.na(df_Leucadia_2018$TICKER)] <- "LUK"

#PRC
PRC_AVG <- mean(df_Leucadia_2018$PRC, na.rm = TRUE)
df_Leucadia_2018$PRC[is.na(df_Leucadia_2018$PRC)] <- PRC_AVG

#VOL
VOL_AVG <- mean(df_Leucadia_2018$VOL, na.rm = TRUE)
df_Leucadia_2018$VOL[is.na(df_Leucadia_2018$VOL)] <- VOL_AVG

#BID
BID_AVG <- BID_AVG <- mean(df_Leucadia_2018$BID, na.rm = TRUE)
df_Leucadia_2018$BID[is.na(df_Leucadia_2018$BID)] <- BID_AVG

#ASK
ASK_AVG <- ASK_AVG <- mean(df_Leucadia_2018$ASK, na.rm = TRUE)
df_Leucadia_2018$ASK[is.na(df_Leucadia_2018$ASK)] <- ASK_AVG

#OPENPRC
OPEN_AVG <- OPEN_AVG <- mean(df_Leucadia_2018$OPENPRC, na.rm = TRUE)
df_Leucadia_2018$OPENPRC[is.na(df_Leucadia_2018$OPENPRC)] <- OPEN_AVG

df_Leucadia_2018
```


```{r}
df_Leucadia_2018$text <- clean_tweets(df_Leucadia_2018$text)
df_Leucadia_2018$text <- lemmatize_words(df_Leucadia_2018$text)
```


```{r}
```
```{r}
#Verizon
#Remove columns 
df_Verizon_2018 <- df_Verizon_2018[,-c(6,8,9,10,15,17,18,19,20,21)] 
df_Verizon_2018

#Ticker 
df_Verizon_2018$TICKER[is.na(df_Verizon_2018$TICKER)] <- "VZ"

#PRC
PRC_AVG <- mean(df_Verizon_2018$PRC, na.rm = TRUE)
df_Verizon_2018$PRC[is.na(df_Verizon_2018$PRC)] <- PRC_AVG

#VOL
VOL_AVG <- mean(df_Verizon_2018$VOL, na.rm = TRUE)
df_Verizon_2018$VOL[is.na(df_Verizon_2018$VOL)] <- VOL_AVG

#BID
BID_AVG <- BID_AVG <- mean(df_Verizon_2018$BID, na.rm = TRUE)
df_Verizon_2018$BID[is.na(df_Verizon_2018$BID)] <- BID_AVG

#ASK
ASK_AVG <- ASK_AVG <- mean(df_Verizon_2018$ASK, na.rm = TRUE)
df_Verizon_2018$ASK[is.na(df_Verizon_2018$ASK)] <- ASK_AVG

#OPENPRC
OPEN_AVG <- OPEN_AVG <- mean(df_Verizon_2018$OPENPRC, na.rm = TRUE)
df_Verizon_2018$OPENPRC[is.na(df_Verizon_2018$OPENPRC)] <- OPEN_AVG

df_Verizon_2018
```


```{r}
df_Verizon_2018$text <- clean_tweets(df_Verizon_2018$text)
df_Verizon_2018$text <- lemmatize_words(df_Verizon_2018$text)
```


```{r}
```
```{r}
#Western Union
#Remove columns 
df_WU_2016 <- df_WU_2016[,-c(6,8,9,10,15,17,18,19,20,21)] 
df_WU_2016

#Ticker 
df_WU_2016$TICKER[is.na(df_WU_2016$TICKER)] <- "WU"

#PRC
PRC_AVG <- mean(df_WU_2016$PRC, na.rm = TRUE)
df_WU_2016$PRC[is.na(df_WU_2016$PRC)] <- PRC_AVG

#VOL
VOL_AVG <- mean(df_WU_2016$VOL, na.rm = TRUE)
df_WU_2016$VOL[is.na(df_WU_2016$VOL)] <- VOL_AVG

#BID
BID_AVG <- BID_AVG <- mean(df_WU_2016$BID, na.rm = TRUE)
df_WU_2016$BID[is.na(df_WU_2016$BID)] <- BID_AVG

#ASK
ASK_AVG <- ASK_AVG <- mean(df_WU_2016$ASK, na.rm = TRUE)
df_WU_2016$ASK[is.na(df_WU_2016$ASK)] <- ASK_AVG

#OPENPRC
OPEN_AVG <- OPEN_AVG <- mean(df_WU_2016$OPENPRC, na.rm = TRUE)
df_WU_2016$OPENPRC[is.na(df_WU_2016$OPENPRC)] <- OPEN_AVG

df_WU_2016
```


```{r}
df_WU_2016$text <- clean_tweets(df_WU_2016$text)
df_WU_2016$text <- lemmatize_words(df_WU_2016$text)
```


```{r}
```
```{r}
#Remove columns 
df_WU_2018 <- df_WU_2018[,-c(6,8,9,10,15,17,18,19,20,21)] 
df_WU_2018

#Ticker 
df_WU_2018$TICKER[is.na(df_WU_2018$TICKER)] <- "WU"

#PRC
PRC_AVG <- mean(df_WU_2018$PRC, na.rm = TRUE)
df_WU_2018$PRC[is.na(df_WU_2018$PRC)] <- PRC_AVG

#VOL
VOL_AVG <- mean(df_WU_2018$VOL, na.rm = TRUE)
df_WU_2018$VOL[is.na(df_WU_2018$VOL)] <- VOL_AVG

#BID
BID_AVG <- BID_AVG <- mean(df_WU_2018$BID, na.rm = TRUE)
df_WU_2018$BID[is.na(df_WU_2018$BID)] <- BID_AVG

#ASK
ASK_AVG <- ASK_AVG <- mean(df_WU_2018$ASK, na.rm = TRUE)
df_WU_2018$ASK[is.na(df_WU_2018$ASK)] <- ASK_AVG

#OPENPRC
OPEN_AVG <- OPEN_AVG <- mean(df_WU_2018$OPENPRC, na.rm = TRUE)
df_WU_2018$OPENPRC[is.na(df_WU_2018$OPENPRC)] <- OPEN_AVG

df_WU_2018
```

```{r}
df_WU_2018$text <- clean_tweets(df_WU_2018$text)
df_WU_2018$text <- lemmatize_words(df_WU_2018$text)
```

```{r}
#Redhat
#Remove columns 
df_RedHat_2016 <- df_RedHat_2016[,-c(6,8,9,10,15,17,18,19,20,21)] 
df_RedHat_2016

#Ticker 
df_RedHat_2016$TICKER[is.na(df_RedHat_2016$TICKER)] <- "RHT"

#PRC
PRC_AVG <- mean(df_RedHat_2016$PRC, na.rm = TRUE)
df_RedHat_2016$PRC[is.na(df_RedHat_2016$PRC)] <- PRC_AVG

#VOL
VOL_AVG <- mean(df_RedHat_2016$VOL, na.rm = TRUE)
df_RedHat_2016$VOL[is.na(df_RedHat_2016$VOL)] <- VOL_AVG

#BID
BID_AVG <- BID_AVG <- mean(df_RedHat_2016$BID, na.rm = TRUE)
df_RedHat_2016$BID[is.na(df_RedHat_2016$BID)] <- BID_AVG

#ASK
ASK_AVG <- ASK_AVG <- mean(df_RedHat_2016$ASK, na.rm = TRUE)
df_RedHat_2016$ASK[is.na(df_RedHat_2016$ASK)] <- ASK_AVG

#OPENPRC
OPEN_AVG <- OPEN_AVG <- mean(df_RedHat_2016$OPENPRC, na.rm = TRUE)
df_RedHat_2016$OPENPRC[is.na(df_RedHat_2016$OPENPRC)] <- OPEN_AVG

df_RedHat_2016
```


```{r}
df_RedHat_2016$text <- clean_tweets(df_RedHat_2016$text)
df_RedHat_2016$text <- lemmatize_words(df_RedHat_2016$text)
```


```{r}
```
```{r}
#Remove columns 
df_RedHat_2018 <- df_RedHat_2018[,-c(6,8,9,10,15,17,18,19,20,21)] 
df_RedHat_2018

#Ticker 
df_RedHat_2018$TICKER[is.na(df_RedHat_2018$TICKER)] <- "RHT"

#PRC
PRC_AVG <- mean(df_RedHat_2018$PRC, na.rm = TRUE)
df_RedHat_2018$PRC[is.na(df_RedHat_2018$PRC)] <- PRC_AVG

#VOL
VOL_AVG <- mean(df_RedHat_2018$VOL, na.rm = TRUE)
df_RedHat_2018$VOL[is.na(df_RedHat_2018$VOL)] <- VOL_AVG

#BID
BID_AVG <- BID_AVG <- mean(df_RedHat_2018$BID, na.rm = TRUE)
df_RedHat_2018$BID[is.na(df_RedHat_2018$BID)] <- BID_AVG

#ASK
ASK_AVG <- ASK_AVG <- mean(df_RedHat_2018$ASK, na.rm = TRUE)
df_RedHat_2018$ASK[is.na(df_RedHat_2018$ASK)] <- ASK_AVG

#OPENPRC
OPEN_AVG <- OPEN_AVG <- mean(df_RedHat_2018$OPENPRC, na.rm = TRUE)
df_RedHat_2018$OPENPRC[is.na(df_RedHat_2018$OPENPRC)] <- OPEN_AVG

df_RedHat_2018
```


```{r}
df_RedHat_2018$text <- clean_tweets(df_RedHat_2018$text)
df_RedHat_2018$text <- lemmatize_words(df_RedHat_2018$text)
```


```{r}
```
```{r}
#Amazon
#Remove columns 
df_AMZN_2016 <- df_AMZN_2016[,-c(6,8,9,10,15,17,18,19,20,21)] 
df_AMZN_2016

#Ticker 
df_AMZN_2016$TICKER[is.na(df_AMZN_2016$TICKER)] <- "AMZN"

#PRC
PRC_AVG <- mean(df_AMZN_2016$PRC, na.rm = TRUE)
df_AMZN_2016$PRC[is.na(df_AMZN_2016$PRC)] <- PRC_AVG

#VOL
VOL_AVG <- mean(df_AMZN_2016$VOL, na.rm = TRUE)
df_AMZN_2016$VOL[is.na(df_AMZN_2016$VOL)] <- VOL_AVG

#BID
BID_AVG <- BID_AVG <- mean(df_AMZN_2016$BID, na.rm = TRUE)
df_AMZN_2016$BID[is.na(df_AMZN_2016$BID)] <- BID_AVG

#ASK
ASK_AVG <- ASK_AVG <- mean(df_AMZN_2016$ASK, na.rm = TRUE)
df_AMZN_2016$ASK[is.na(df_AMZN_2016$ASK)] <- ASK_AVG

#OPENPRC
OPEN_AVG <- OPEN_AVG <- mean(df_AMZN_2016$OPENPRC, na.rm = TRUE)
df_AMZN_2016$OPENPRC[is.na(df_AMZN_2016$OPENPRC)] <- OPEN_AVG

df_AMZN_2016
```
```{r}
df_AMZN_2016$text <- clean_tweets(df_AMZN_2016$text)
df_AMZN_2016$text <- lemmatize_words(df_AMZN_2016$text)
```

```{r}
#Remove columns 
df_AMZN_2018 <- df_AMZN_2018[,-c(6,8,9,10,15,17,18,19,20,21)] 
df_AMZN_2018

#Ticker 
df_AMZN_2018$TICKER[is.na(df_AMZN_2018$TICKER)] <- "AMZN"

#PRC
PRC_AVG <- mean(df_AMZN_2018$PRC, na.rm = TRUE)
df_AMZN_2018$PRC[is.na(df_AMZN_2018$PRC)] <- PRC_AVG

#VOL
VOL_AVG <- mean(df_AMZN_2018$VOL, na.rm = TRUE)
df_AMZN_2018$VOL[is.na(df_AMZN_2018$VOL)] <- VOL_AVG

#BID
BID_AVG <- BID_AVG <- mean(df_AMZN_2018$BID, na.rm = TRUE)
df_AMZN_2018$BID[is.na(df_AMZN_2018$BID)] <- BID_AVG

#ASK
ASK_AVG <- ASK_AVG <- mean(df_AMZN_2018$ASK, na.rm = TRUE)
df_AMZN_2018$ASK[is.na(df_AMZN_2018$ASK)] <- ASK_AVG

#OPENPRC
OPEN_AVG <- OPEN_AVG <- mean(df_AMZN_2018$OPENPRC, na.rm = TRUE)
df_AMZN_2018$OPENPRC[is.na(df_AMZN_2018$OPENPRC)] <- OPEN_AVG

df_AMZN_2018
```
```{r}
df_AMZN_2018$text <- clean_tweets(df_AMZN_2018$text)
df_AMZN_2018$text <- lemmatize_words(df_AMZN_2018$text)

```

```{r}
#GE
#Remove columns 
df_GE_2016 <- df_GE_2016[,-c(6,8,9,10,15,17,18,19,20,21)] 
df_GE_2016

#Ticker 
df_GE_2016$TICKER[is.na(df_GE_2016$TICKER)] <- "GE"

#PRC
PRC_AVG <- mean(df_GE_2016$PRC, na.rm = TRUE)
df_GE_2016$PRC[is.na(df_GE_2016$PRC)] <- PRC_AVG

#VOL
VOL_AVG <- mean(df_GE_2016$VOL, na.rm = TRUE)
df_GE_2016$VOL[is.na(df_GE_2016$VOL)] <- VOL_AVG

#BID
BID_AVG <- BID_AVG <- mean(df_GE_2016$BID, na.rm = TRUE)
df_GE_2016$BID[is.na(df_GE_2016$BID)] <- BID_AVG

#ASK
ASK_AVG <- ASK_AVG <- mean(df_GE_2016$ASK, na.rm = TRUE)
df_GE_2016$ASK[is.na(df_GE_2016$ASK)] <- ASK_AVG

#OPENPRC
OPEN_AVG <- OPEN_AVG <- mean(df_GE_2016$OPENPRC, na.rm = TRUE)
df_GE_2016$OPENPRC[is.na(df_GE_2016$OPENPRC)] <- OPEN_AVG

df_GE_2016
```


```{r}
df_GE_2016$text <- clean_tweets(df_GE_2016$text)
df_GE_2016$text <- lemmatize_words(df_GE_2016$text)
```


```{r}
```
```{r}
#Remove columns 
df_GE_2017 <- df_GE_2017[,-c(6,8,9,10,15,17,18,19,20,21)] 
df_GE_2017

#Ticker 
df_GE_2017$TICKER[is.na(df_GE_2017$TICKER)] <- "GE"

#PRC
PRC_AVG <- mean(df_GE_2017$PRC, na.rm = TRUE)
df_GE_2017$PRC[is.na(df_GE_2017$PRC)] <- PRC_AVG

#VOL
VOL_AVG <- mean(df_GE_2017$VOL, na.rm = TRUE)
df_GE_2017$VOL[is.na(df_GE_2017$VOL)] <- VOL_AVG

#BID
BID_AVG <- BID_AVG <- mean(df_GE_2017$BID, na.rm = TRUE)
df_GE_2017$BID[is.na(df_GE_2017$BID)] <- BID_AVG

#ASK
ASK_AVG <- ASK_AVG <- mean(df_GE_2017$ASK, na.rm = TRUE)
df_GE_2017$ASK[is.na(df_GE_2017$ASK)] <- ASK_AVG

#OPENPRC
OPEN_AVG <- OPEN_AVG <- mean(df_GE_2017$OPENPRC, na.rm = TRUE)
df_GE_2017$OPENPRC[is.na(df_GE_2017$OPENPRC)] <- OPEN_AVG

df_GE_2017
```
```{r}
df_GE_2017$text <- clean_tweets(df_GE_2017$text)
df_GE_2017$text <- lemmatize_words(df_GE_2017$text)
```

```{r}
#Fiserv
#Remove columns 
df_Fiserv_2016 <- df_Fiserv_2016[,-c(6,8,9,10,15,17,18,19,20,21)] 
df_Fiserv_2016

#Ticker 
df_Fiserv_2016$TICKER[is.na(df_Fiserv_2016$TICKER)] <- "FISV"

#PRC
PRC_AVG <- mean(df_Fiserv_2016$PRC, na.rm = TRUE)
df_Fiserv_2016$PRC[is.na(df_Fiserv_2016$PRC)] <- PRC_AVG

#VOL
VOL_AVG <- mean(df_Fiserv_2016$VOL, na.rm = TRUE)
df_Fiserv_2016$VOL[is.na(df_Fiserv_2016$VOL)] <- VOL_AVG

#BID
BID_AVG <- BID_AVG <- mean(df_Fiserv_2016$BID, na.rm = TRUE)
df_Fiserv_2016$BID[is.na(df_Fiserv_2016$BID)] <- BID_AVG

#ASK
ASK_AVG <- ASK_AVG <- mean(df_Fiserv_2016$ASK, na.rm = TRUE)
df_Fiserv_2016$ASK[is.na(df_Fiserv_2016$ASK)] <- ASK_AVG

#OPENPRC
OPEN_AVG <- OPEN_AVG <- mean(df_Fiserv_2016$OPENPRC, na.rm = TRUE)
df_Fiserv_2016$OPENPRC[is.na(df_Fiserv_2016$OPENPRC)] <- OPEN_AVG

df_Fiserv_2016
```
```{r}
df_Fiserv_2016$text <- clean_tweets(df_Fiserv_2016$text)
df_Fiserv_2016$text <- lemmatize_words(df_Fiserv_2016$text)
```

```{r}
#Fiserv
#Remove columns 
df_Fiserv_2018 <- df_Fiserv_2018[,-c(6,8,9,10,15,17,18,19,20,21)] 
df_Fiserv_2018

#Ticker 
df_Fiserv_2018$TICKER[is.na(df_Fiserv_2018$TICKER)] <- "FISV"

#PRC
PRC_AVG <- mean(df_Fiserv_2018$PRC, na.rm = TRUE)
df_Fiserv_2018$PRC[is.na(df_Fiserv_2018$PRC)] <- PRC_AVG

#VOL
VOL_AVG <- mean(df_Fiserv_2018$VOL, na.rm = TRUE)
df_Fiserv_2018$VOL[is.na(df_Fiserv_2018$VOL)] <- VOL_AVG

#BID
BID_AVG <- BID_AVG <- mean(df_Fiserv_2018$BID, na.rm = TRUE)
df_Fiserv_2018$BID[is.na(df_Fiserv_2018$BID)] <- BID_AVG

#ASK
ASK_AVG <- ASK_AVG <- mean(df_Fiserv_2018$ASK, na.rm = TRUE)
df_Fiserv_2018$ASK[is.na(df_Fiserv_2018$ASK)] <- ASK_AVG

#OPENPRC
OPEN_AVG <- OPEN_AVG <- mean(df_Fiserv_2018$OPENPRC, na.rm = TRUE)
df_Fiserv_2018$OPENPRC[is.na(df_Fiserv_2018$OPENPRC)] <- OPEN_AVG

df_Fiserv_2018
```
```{r}
df_Fiserv_2018$text <- clean_tweets(df_Fiserv_2018$text)
df_Fiserv_2018$text <- lemmatize_words(df_Fiserv_2018$text)
```

```{r}
#Waste Management
#Remove columns 
df_WM_2018 <- df_WM_2018[,-c(6,8,9,10,15,17,18,19,20,21)] 
df_WM_2018

#Ticker 
df_WM_2018$TICKER[is.na(df_WM_2018$TICKER)] <- "WM"

#PRC
PRC_AVG <- mean(df_WM_2018$PRC, na.rm = TRUE)
df_WM_2018$PRC[is.na(df_WM_2018$PRC)] <- PRC_AVG

#VOL
VOL_AVG <- mean(df_WM_2018$VOL, na.rm = TRUE)
df_WM_2018$VOL[is.na(df_WM_2018$VOL)] <- VOL_AVG

#BID
BID_AVG <- BID_AVG <- mean(df_WM_2018$BID, na.rm = TRUE)
df_WM_2018$BID[is.na(df_WM_2018$BID)] <- BID_AVG

#ASK
ASK_AVG <- ASK_AVG <- mean(df_WM_2018$ASK, na.rm = TRUE)
df_WM_2018$ASK[is.na(df_WM_2018$ASK)] <- ASK_AVG

#OPENPRC
OPEN_AVG <- OPEN_AVG <- mean(df_WM_2018$OPENPRC, na.rm = TRUE)
df_WM_2018$OPENPRC[is.na(df_WM_2018$OPENPRC)] <- OPEN_AVG

df_WM_2018
```
```{r}
df_WM_2018$text <- clean_tweets(df_WM_2018$text)
df_WM_2018$text <- lemmatize_words(df_WM_2018$text)
```

```{r}
#WillsTower
#Remove columns 
df_Wills_2016 <- df_Wills_2016[,-c(6,8,9,10,15,17,18,19,20,21)] 
df_Wills_2016

#Ticker 
df_Wills_2016$TICKER[is.na(df_Wills_2016$TICKER)] <- "WLTW"

#PRC
PRC_AVG <- mean(df_Wills_2016$PRC, na.rm = TRUE)
df_Wills_2016$PRC[is.na(df_Wills_2016$PRC)] <- PRC_AVG

#VOL
VOL_AVG <- mean(df_Wills_2016$VOL, na.rm = TRUE)
df_Wills_2016$VOL[is.na(df_Wills_2016$VOL)] <- VOL_AVG

#BID
BID_AVG <- BID_AVG <- mean(df_Wills_2016$BID, na.rm = TRUE)
df_Wills_2016$BID[is.na(df_Wills_2016$BID)] <- BID_AVG

#ASK
ASK_AVG <- ASK_AVG <- mean(df_Wills_2016$ASK, na.rm = TRUE)
df_Wills_2016$ASK[is.na(df_Wills_2016$ASK)] <- ASK_AVG

#OPENPRC
OPEN_AVG <- OPEN_AVG <- mean(df_Wills_2016$OPENPRC, na.rm = TRUE)
df_Wills_2016$OPENPRC[is.na(df_Wills_2016$OPENPRC)] <- OPEN_AVG

df_Wills_2016
```
```{r}
df_Wills_2016$text <- clean_tweets(df_Wills_2016$text)
df_Wills_2016$text <- lemmatize_words(df_Wills_2016$text)
```

```{r}
#Remove columns 
df_Wills_2018 <- df_Wills_2018[,-c(6,8,9,10,15,17,18,19,20,21)] 
df_Wills_2018

#Ticker 
df_Wills_2018$TICKER[is.na(df_Wills_2018$TICKER)] <- "WLTW"

#PRC
PRC_AVG <- mean(df_Wills_2018$PRC, na.rm = TRUE)
df_Wills_2018$PRC[is.na(df_Wills_2018$PRC)] <- PRC_AVG

#VOL
VOL_AVG <- mean(df_Wills_2018$VOL, na.rm = TRUE)
df_Wills_2018$VOL[is.na(df_Wills_2018$VOL)] <- VOL_AVG

#BID
BID_AVG <- BID_AVG <- mean(df_Wills_2018$BID, na.rm = TRUE)
df_Wills_2018$BID[is.na(df_Wills_2018$BID)] <- BID_AVG

#ASK
ASK_AVG <- ASK_AVG <- mean(df_Wills_2018$ASK, na.rm = TRUE)
df_Wills_2018$ASK[is.na(df_Wills_2018$ASK)] <- ASK_AVG

#OPENPRC
OPEN_AVG <- OPEN_AVG <- mean(df_Wills_2018$OPENPRC, na.rm = TRUE)
df_Wills_2018$OPENPRC[is.na(df_Wills_2018$OPENPRC)] <- OPEN_AVG

df_Wills_2018
```


```{r}
df_Wills_2018$text <- clean_tweets(df_Wills_2018$text)
df_Wills_2018$text <- lemmatize_words(df_Wills_2018$text)
```


```{r}
```
```{r}
#Tripadvisor
#Remove columns 
df_tripadvisor_2016 <- df_tripadvisor_2016[,-c(6,8,9,10,15,17,18,19,20,21)] 
df_tripadvisor_2016

#Ticker 
df_tripadvisor_2016$TICKER[is.na(df_tripadvisor_2016$TICKER)] <- "TRIP"

#PRC
PRC_AVG <- mean(df_tripadvisor_2016$PRC, na.rm = TRUE)
df_tripadvisor_2016$PRC[is.na(df_tripadvisor_2016$PRC)] <- PRC_AVG

#VOL
VOL_AVG <- mean(df_tripadvisor_2016$VOL, na.rm = TRUE)
df_tripadvisor_2016$VOL[is.na(df_tripadvisor_2016$VOL)] <- VOL_AVG

#BID
BID_AVG <- BID_AVG <- mean(df_tripadvisor_2016$BID, na.rm = TRUE)
df_tripadvisor_2016$BID[is.na(df_tripadvisor_2016$BID)] <- BID_AVG

#ASK
ASK_AVG <- ASK_AVG <- mean(df_tripadvisor_2016$ASK, na.rm = TRUE)
df_tripadvisor_2016$ASK[is.na(df_tripadvisor_2016$ASK)] <- ASK_AVG

#OPENPRC
OPEN_AVG <- OPEN_AVG <- mean(df_tripadvisor_2016$OPENPRC, na.rm = TRUE)
df_tripadvisor_2016$OPENPRC[is.na(df_tripadvisor_2016$OPENPRC)] <- OPEN_AVG

df_tripadvisor_2016
```
```{r}
df_tripadvisor_2016$text <- clean_tweets(df_tripadvisor_2016$text)
df_tripadvisor_2016$text <- lemmatize_words(df_tripadvisor_2016$text)
```

```{r}
#Remove columns 
df_tripadvisor_2018 <- df_tripadvisor_2018[,-c(6,8,9,10,15,17,18,19,20,21)] 
df_tripadvisor_2018

#Ticker 
df_tripadvisor_2018$TICKER[is.na(df_tripadvisor_2018$TICKER)] <- "TRIP"

#PRC
PRC_AVG <- mean(df_tripadvisor_2018$PRC, na.rm = TRUE)
df_tripadvisor_2018$PRC[is.na(df_tripadvisor_2018$PRC)] <- PRC_AVG

#VOL
VOL_AVG <- mean(df_tripadvisor_2018$VOL, na.rm = TRUE)
df_tripadvisor_2018$VOL[is.na(df_tripadvisor_2018$VOL)] <- VOL_AVG

#BID
BID_AVG <- BID_AVG <- mean(df_tripadvisor_2018$BID, na.rm = TRUE)
df_tripadvisor_2018$BID[is.na(df_tripadvisor_2018$BID)] <- BID_AVG

#ASK
ASK_AVG <- ASK_AVG <- mean(df_tripadvisor_2018$ASK, na.rm = TRUE)
df_tripadvisor_2018$ASK[is.na(df_tripadvisor_2018$ASK)] <- ASK_AVG

#OPENPRC
OPEN_AVG <- OPEN_AVG <- mean(df_tripadvisor_2018$OPENPRC, na.rm = TRUE)
df_tripadvisor_2018$OPENPRC[is.na(df_tripadvisor_2018$OPENPRC)] <- OPEN_AVG

df_tripadvisor_2018
```


```{r}
df_tripadvisor_2018$text <- clean_tweets(df_tripadvisor_2018$text)
df_tripadvisor_2018$text <- lemmatize_words(df_tripadvisor_2018$text)
```


```{r}
```
```{r}
#DavitaKent
#Remove columns 
df_DavitaK_2016 <- df_DavitaK_2016[,-c(6,8,9,10,15,17,18,19,20,21)] 
df_DavitaK_2016

#Ticker 
df_DavitaK_2016$TICKER[is.na(df_DavitaK_2016$TICKER)] <- "DVA"

#PRC
PRC_AVG <- mean(df_DavitaK_2016$PRC, na.rm = TRUE)
df_DavitaK_2016$PRC[is.na(df_DavitaK_2016$PRC)] <- PRC_AVG

#VOL
VOL_AVG <- mean(df_DavitaK_2016$VOL, na.rm = TRUE)
df_DavitaK_2016$VOL[is.na(df_DavitaK_2016$VOL)] <- VOL_AVG

#BID
BID_AVG <- BID_AVG <- mean(df_DavitaK_2016$BID, na.rm = TRUE)
df_DavitaK_2016$BID[is.na(df_DavitaK_2016$BID)] <- BID_AVG

#ASK
ASK_AVG <- ASK_AVG <- mean(df_DavitaK_2016$ASK, na.rm = TRUE)
df_DavitaK_2016$ASK[is.na(df_DavitaK_2016$ASK)] <- ASK_AVG

#OPENPRC
OPEN_AVG <- OPEN_AVG <- mean(df_DavitaK_2016$OPENPRC, na.rm = TRUE)
df_DavitaK_2016$OPENPRC[is.na(df_DavitaK_2016$OPENPRC)] <- OPEN_AVG

df_DavitaK_2016
```


```{r}
df_DavitaK_2016$text <- clean_tweets(df_DavitaK_2016$text)
df_DavitaK_2016$text <- lemmatize_words(df_DavitaK_2016$text)
```


```{r}
```
```{r}
#Remove columns 
df_DavitaK_2018 <- df_DavitaK_2018[,-c(6,8,9,10,15,17,18,19,20,21)] 
df_DavitaK_2018

#Ticker 
df_DavitaK_2018$TICKER[is.na(df_DavitaK_2018$TICKER)] <- "DVA"

#PRC
PRC_AVG <- mean(df_DavitaK_2018$PRC, na.rm = TRUE)
df_DavitaK_2018$PRC[is.na(df_DavitaK_2018$PRC)] <- PRC_AVG

#VOL
VOL_AVG <- mean(df_DavitaK_2018$VOL, na.rm = TRUE)
df_DavitaK_2018$VOL[is.na(df_DavitaK_2018$VOL)] <- VOL_AVG

#BID
BID_AVG <- BID_AVG <- mean(df_DavitaK_2018$BID, na.rm = TRUE)
df_DavitaK_2018$BID[is.na(df_DavitaK_2018$BID)] <- BID_AVG

#ASK
ASK_AVG <- ASK_AVG <- mean(df_DavitaK_2018$ASK, na.rm = TRUE)
df_DavitaK_2018$ASK[is.na(df_DavitaK_2018$ASK)] <- ASK_AVG

#OPENPRC
OPEN_AVG <- OPEN_AVG <- mean(df_DavitaK_2018$OPENPRC, na.rm = TRUE)
df_DavitaK_2018$OPENPRC[is.na(df_DavitaK_2018$OPENPRC)] <- OPEN_AVG

df_DavitaK_2018
```
```{r}
df_DavitaK_2018$text <- clean_tweets(df_DavitaK_2018$text)
df_DavitaK_2018$text <- lemmatize_words(df_DavitaK_2018$text)
```

```{r}
#Starbucks 
#Remove columns 
df_Starbucks_2018 <- df_Starbucks_2018[,-c(6,8,9,10,15,17,18,19,20,21)] 
df_Starbucks_2018

#Ticker 
df_Starbucks_2018$TICKER[is.na(df_Starbucks_2018$TICKER)] <- "SBUX"

#PRC
PRC_AVG <- mean(df_Starbucks_2018$PRC, na.rm = TRUE)
df_Starbucks_2018$PRC[is.na(df_Starbucks_2018$PRC)] <- PRC_AVG

#VOL
VOL_AVG <- mean(df_Starbucks_2018$VOL, na.rm = TRUE)
df_Starbucks_2018$VOL[is.na(df_Starbucks_2018$VOL)] <- VOL_AVG

#BID
BID_AVG <- BID_AVG <- mean(df_Starbucks_2018$BID, na.rm = TRUE)
df_Starbucks_2018$BID[is.na(df_Starbucks_2018$BID)] <- BID_AVG

#ASK
ASK_AVG <- ASK_AVG <- mean(df_Starbucks_2018$ASK, na.rm = TRUE)
df_Starbucks_2018$ASK[is.na(df_Starbucks_2018$ASK)] <- ASK_AVG

#OPENPRC
OPEN_AVG <- OPEN_AVG <- mean(df_Starbucks_2018$OPENPRC, na.rm = TRUE)
df_Starbucks_2018$OPENPRC[is.na(df_Starbucks_2018$OPENPRC)] <- OPEN_AVG

df_Starbucks_2018
```


```{r}
df_Starbucks_2018$text <- clean_tweets(df_Starbucks_2018$text)
df_Starbucks_2018$text <- lemmatize_words(df_Starbucks_2018$text)
```


```{r}
```
```{r}
#McCormick
#Remove columns 
df_McCormick_2018 <- df_McCormick_2018[,-c(6,8,9,10,15,17,18,19,20,21)] 
df_McCormick_2018

#Ticker 
df_McCormick_2018$TICKER[is.na(df_McCormick_2018$TICKER)] <- "MKC"

#PRC
PRC_AVG <- mean(df_McCormick_2018$PRC, na.rm = TRUE)
df_McCormick_2018$PRC[is.na(df_McCormick_2018$PRC)] <- PRC_AVG

#VOL
VOL_AVG <- mean(df_McCormick_2018$VOL, na.rm = TRUE)
df_McCormick_2018$VOL[is.na(df_McCormick_2018$VOL)] <- VOL_AVG

#BID
BID_AVG <- BID_AVG <- mean(df_McCormick_2018$BID, na.rm = TRUE)
df_McCormick_2018$BID[is.na(df_McCormick_2018$BID)] <- BID_AVG

#ASK
ASK_AVG <- ASK_AVG <- mean(df_McCormick_2018$ASK, na.rm = TRUE)
df_McCormick_2018$ASK[is.na(df_McCormick_2018$ASK)] <- ASK_AVG

#OPENPRC
OPEN_AVG <- OPEN_AVG <- mean(df_McCormick_2018$OPENPRC, na.rm = TRUE)
df_McCormick_2018$OPENPRC[is.na(df_McCormick_2018$OPENPRC)] <- OPEN_AVG

df_McCormick_2018
```
```{r}
df_McCormick_2018$text <- clean_tweets(df_McCormick_2018$text)
df_McCormick_2018$text <- lemmatize_words(df_McCormick_2018$text)
```

```{r}
#IHS Markit 
#Remove columns 
df_IHS_2018 <- df_IHS_2018[,-c(6,8,9,10,15,17,18,19,20,21)] 
df_IHS_2018

#Ticker 
df_IHS_2018$TICKER[is.na(df_IHS_2018$TICKER)] <- "TCX"

#PRC
PRC_AVG <- mean(df_IHS_2018$PRC, na.rm = TRUE)
df_IHS_2018$PRC[is.na(df_IHS_2018$PRC)] <- PRC_AVG

#VOL
VOL_AVG <- mean(df_IHS_2018$VOL, na.rm = TRUE)
df_IHS_2018$VOL[is.na(df_IHS_2018$VOL)] <- VOL_AVG

#BID
BID_AVG <- BID_AVG <- mean(df_IHS_2018$BID, na.rm = TRUE)
df_IHS_2018$BID[is.na(df_IHS_2018$BID)] <- BID_AVG

#ASK
ASK_AVG <- ASK_AVG <- mean(df_IHS_2018$ASK, na.rm = TRUE)
df_IHS_2018$ASK[is.na(df_IHS_2018$ASK)] <- ASK_AVG

#OPENPRC
OPEN_AVG <- OPEN_AVG <- mean(df_IHS_2018$OPENPRC, na.rm = TRUE)
df_IHS_2018$OPENPRC[is.na(df_IHS_2018$OPENPRC)] <- OPEN_AVG

df_IHS_2018
```
```{r}
df_IHS_2018$text <- clean_tweets(df_IHS_2018$text)
df_IHS_2018$text <- lemmatize_words(df_IHS_2018$text)
```

```{r}
#AMD 
#Remove columns 
df_AMD_2018 <- df_AMD_2018[,-c(6,8,9,10,15,17,18,19,20,21)] 
df_AMD_2018

#Ticker 
df_AMD_2018$TICKER[is.na(df_AMD_2018$TICKER)] <- "AMD"

#PRC
PRC_AVG <- mean(df_AMD_2018$PRC, na.rm = TRUE)
df_AMD_2018$PRC[is.na(df_AMD_2018$PRC)] <- PRC_AVG

#VOL
VOL_AVG <- mean(df_AMD_2018$VOL, na.rm = TRUE)
df_AMD_2018$VOL[is.na(df_AMD_2018$VOL)] <- VOL_AVG

#BID
BID_AVG <- BID_AVG <- mean(df_AMD_2018$BID, na.rm = TRUE)
df_AMD_2018$BID[is.na(df_AMD_2018$BID)] <- BID_AVG

#ASK
ASK_AVG <- ASK_AVG <- mean(df_AMD_2018$ASK, na.rm = TRUE)
df_AMD_2018$ASK[is.na(df_AMD_2018$ASK)] <- ASK_AVG

#OPENPRC
OPEN_AVG <- OPEN_AVG <- mean(df_AMD_2018$OPENPRC, na.rm = TRUE)
df_AMD_2018$OPENPRC[is.na(df_AMD_2018$OPENPRC)] <- OPEN_AVG

df_AMD_2018
```


```{r}
df_AMD_2018$text <- clean_tweets(df_AMD_2018$text)
df_AMD_2018$text <- lemmatize_words(df_AMD_2018$text)
```


```{r}
```
```{r}
#ResMed
#Remove columns 
df_ResMed_2016 <- df_ResMed_2016[,-c(6,8,9,10,15,17,18,19,20,21)] 
df_ResMed_2016

#Ticker 
df_ResMed_2016$TICKER[is.na(df_ResMed_2016$TICKER)] <- "RMD"

#PRC
PRC_AVG <- mean(df_ResMed_2016$PRC, na.rm = TRUE)
df_ResMed_2016$PRC[is.na(df_ResMed_2016$PRC)] <- PRC_AVG

#VOL
VOL_AVG <- mean(df_ResMed_2016$VOL, na.rm = TRUE)
df_ResMed_2016$VOL[is.na(df_ResMed_2016$VOL)] <- VOL_AVG

#BID
BID_AVG <- BID_AVG <- mean(df_ResMed_2016$BID, na.rm = TRUE)
df_ResMed_2016$BID[is.na(df_ResMed_2016$BID)] <- BID_AVG

#ASK
ASK_AVG <- ASK_AVG <- mean(df_ResMed_2016$ASK, na.rm = TRUE)
df_ResMed_2016$ASK[is.na(df_ResMed_2016$ASK)] <- ASK_AVG

#OPENPRC
OPEN_AVG <- OPEN_AVG <- mean(df_ResMed_2016$OPENPRC, na.rm = TRUE)
df_ResMed_2016$OPENPRC[is.na(df_ResMed_2016$OPENPRC)] <- OPEN_AVG

df_ResMed_2016
```


```{r}
df_ResMed_2016$text <- clean_tweets(df_ResMed_2016$text)
df_ResMed_2016$text <- lemmatize_words(df_ResMed_2016$text)
```


```{r}
```
```{r}
#Remove columns 
df_ResMed_2018 <- df_ResMed_2018[,-c(6,8,9,10,15,17,18,19,20,21)] 
df_ResMed_2018

#Ticker 
df_ResMed_2018$TICKER[is.na(df_ResMed_2018$TICKER)] <- "RMD"

#PRC
PRC_AVG <- mean(df_ResMed_2018$PRC, na.rm = TRUE)
df_ResMed_2018$PRC[is.na(df_ResMed_2018$PRC)] <- PRC_AVG

#VOL
VOL_AVG <- mean(df_ResMed_2018$VOL, na.rm = TRUE)
df_ResMed_2018$VOL[is.na(df_ResMed_2018$VOL)] <- VOL_AVG

#BID
BID_AVG <- BID_AVG <- mean(df_ResMed_2018$BID, na.rm = TRUE)
df_ResMed_2018$BID[is.na(df_ResMed_2018$BID)] <- BID_AVG

#ASK
ASK_AVG <- ASK_AVG <- mean(df_ResMed_2018$ASK, na.rm = TRUE)
df_ResMed_2018$ASK[is.na(df_ResMed_2018$ASK)] <- ASK_AVG

#OPENPRC
OPEN_AVG <- OPEN_AVG <- mean(df_ResMed_2018$OPENPRC, na.rm = TRUE)
df_ResMed_2018$OPENPRC[is.na(df_ResMed_2018$OPENPRC)] <- OPEN_AVG

df_ResMed_2018
```
```{r}
df_ResMed_2018$text <- clean_tweets(df_ResMed_2018$text)
df_ResMed_2018$text <- lemmatize_words(df_ResMed_2018$text)
```

```{r}
#CA
#Remove columns 
df_CA_2016 <- df_CA_2016[,-c(6,8,9,10,15,17,18,19,20,21)] 
df_CA_2016

#Ticker 
df_CA_2016$TICKER[is.na(df_CA_2016$TICKER)] <- "CA"

#PRC
PRC_AVG <- mean(df_CA_2016$PRC, na.rm = TRUE)
df_CA_2016$PRC[is.na(df_CA_2016$PRC)] <- PRC_AVG

#VOL
VOL_AVG <- mean(df_CA_2016$VOL, na.rm = TRUE)
df_CA_2016$VOL[is.na(df_CA_2016$VOL)] <- VOL_AVG

#BID
BID_AVG <- BID_AVG <- mean(df_CA_2016$BID, na.rm = TRUE)
df_CA_2016$BID[is.na(df_CA_2016$BID)] <- BID_AVG

#ASK
ASK_AVG <- ASK_AVG <- mean(df_CA_2016$ASK, na.rm = TRUE)
df_CA_2016$ASK[is.na(df_CA_2016$ASK)] <- ASK_AVG

#OPENPRC
OPEN_AVG <- OPEN_AVG <- mean(df_CA_2016$OPENPRC, na.rm = TRUE)
df_CA_2016$OPENPRC[is.na(df_CA_2016$OPENPRC)] <- OPEN_AVG

df_CA_2016
```
```{r}
df_CA_2016$text <- clean_tweets(df_CA_2016$text)
df_CA_2016$text <- lemmatize_words(df_CA_2016$text)
```


```{r}
#Remove columns 
df_CA_2018 <- df_CA_2018[,-c(6,8,9,10,15,17,18,19,20,21)] 
df_CA_2018

#Ticker 
df_CA_2018$TICKER[is.na(df_CA_2018$TICKER)] <- "CA"

#PRC
PRC_AVG <- mean(df_CA_2018$PRC, na.rm = TRUE)
df_CA_2018$PRC[is.na(df_CA_2018$PRC)] <- PRC_AVG

#VOL
VOL_AVG <- mean(df_CA_2018$VOL, na.rm = TRUE)
df_CA_2018$VOL[is.na(df_CA_2018$VOL)] <- VOL_AVG

#BID
BID_AVG <- BID_AVG <- mean(df_CA_2018$BID, na.rm = TRUE)
df_CA_2018$BID[is.na(df_CA_2018$BID)] <- BID_AVG

#ASK
ASK_AVG <- ASK_AVG <- mean(df_CA_2018$ASK, na.rm = TRUE)
df_CA_2018$ASK[is.na(df_CA_2018$ASK)] <- ASK_AVG

#OPENPRC
OPEN_AVG <- OPEN_AVG <- mean(df_CA_2018$OPENPRC, na.rm = TRUE)
df_CA_2018$OPENPRC[is.na(df_CA_2018$OPENPRC)] <- OPEN_AVG

df_CA_2018
```


```{r}
df_CA_2018$text <- clean_tweets(df_CA_2018$text)
df_CA_2018$text <- lemmatize_words(df_CA_2018$text)
```


```{r}
```
```{r}
#GM 
#Remove columns 
df_GM_2016 <- df_GM_2016[,-c(6,8,9,10,15,17,18,19,20,21)] 
df_GM_2016

#Ticker 
df_GM_2016$TICKER[is.na(df_GM_2016$TICKER)] <- "GM"

#PRC
PRC_AVG <- mean(df_GM_2016$PRC, na.rm = TRUE)
df_GM_2016$PRC[is.na(df_GM_2016$PRC)] <- PRC_AVG

#VOL
VOL_AVG <- mean(df_GM_2016$VOL, na.rm = TRUE)
df_GM_2016$VOL[is.na(df_GM_2016$VOL)] <- VOL_AVG

#BID
BID_AVG <- BID_AVG <- mean(df_GM_2016$BID, na.rm = TRUE)
df_GM_2016$BID[is.na(df_GM_2016$BID)] <- BID_AVG

#ASK
ASK_AVG <- ASK_AVG <- mean(df_GM_2016$ASK, na.rm = TRUE)
df_GM_2016$ASK[is.na(df_GM_2016$ASK)] <- ASK_AVG

#OPENPRC
OPEN_AVG <- OPEN_AVG <- mean(df_GM_2016$OPENPRC, na.rm = TRUE)
df_GM_2016$OPENPRC[is.na(df_GM_2016$OPENPRC)] <- OPEN_AVG

df_GM_2016
```


```{r}
df_GM_2016$text <- clean_tweets(df_GM_2016$text)
df_GM_2016$text <- lemmatize_words(df_GM_2016$text)
```


```{r}
```
```{r}
#Remove columns 
df_GM_2018 <- df_GM_2018[,-c(6,8,9,10,15,17,18,19,20,21)] 
df_GM_2018

#Ticker 
df_GM_2018$TICKER[is.na(df_GM_2018$TICKER)] <- "GM"

#PRC
PRC_AVG <- mean(df_GM_2018$PRC, na.rm = TRUE)
df_GM_2018$PRC[is.na(df_GM_2018$PRC)] <- PRC_AVG

#VOL
VOL_AVG <- mean(df_GM_2018$VOL, na.rm = TRUE)
df_GM_2018$VOL[is.na(df_GM_2018$VOL)] <- VOL_AVG

#BID
BID_AVG <- BID_AVG <- mean(df_GM_2018$BID, na.rm = TRUE)
df_GM_2018$BID[is.na(df_GM_2018$BID)] <- BID_AVG

#ASK
ASK_AVG <- ASK_AVG <- mean(df_GM_2018$ASK, na.rm = TRUE)
df_GM_2018$ASK[is.na(df_GM_2018$ASK)] <- ASK_AVG

#OPENPRC
OPEN_AVG <- OPEN_AVG <- mean(df_GM_2018$OPENPRC, na.rm = TRUE)
df_GM_2018$OPENPRC[is.na(df_GM_2018$OPENPRC)] <- OPEN_AVG

df_GM_2018
```


```{r}
df_GM_2018$text <- clean_tweets(df_GM_2018$text)
df_GM_2018$text <- lemmatize_words(df_GM_2018$text)
```
```{r}
#Aetna
#Remove columns 
df_Aetna_2016 <- df_Aetna_2016[,-c(6,8,9,10,15,17,18,19,20,21)] 
df_Aetna_2016

#Ticker 
df_Aetna_2016$TICKER[is.na(df_Aetna_2016$TICKER)] <- "AET"

#PRC
PRC_AVG <- mean(df_Aetna_2016$PRC, na.rm = TRUE)
df_Aetna_2016$PRC[is.na(df_Aetna_2016$PRC)] <- PRC_AVG

#VOL
VOL_AVG <- mean(df_Aetna_2016$VOL, na.rm = TRUE)
df_Aetna_2016$VOL[is.na(df_Aetna_2016$VOL)] <- VOL_AVG

#BID
BID_AVG <- BID_AVG <- mean(df_Aetna_2016$BID, na.rm = TRUE)
df_Aetna_2016$BID[is.na(df_Aetna_2016$BID)] <- BID_AVG

#ASK
ASK_AVG <- ASK_AVG <- mean(df_Aetna_2016$ASK, na.rm = TRUE)
df_Aetna_2016$ASK[is.na(df_Aetna_2016$ASK)] <- ASK_AVG

#OPENPRC
OPEN_AVG <- OPEN_AVG <- mean(df_Aetna_2016$OPENPRC, na.rm = TRUE)
df_Aetna_2016$OPENPRC[is.na(df_Aetna_2016$OPENPRC)] <- OPEN_AVG

df_Aetna_2016
```
```{r}
df_Aetna_2016$text <- clean_tweets(df_Aetna_2016$text)
df_Aetna_2016$text <- lemmatize_words(df_Aetna_2016$text)
```

```{r}
#Remove columns 
df_Aetna_2018 <- df_Aetna_2018[,-c(6,8,9,10,15,17,18,19,20,21)] 
df_Aetna_2018

#Ticker 
df_Aetna_2018$TICKER[is.na(df_Aetna_2018$TICKER)] <- "AET"

#PRC
PRC_AVG <- mean(df_Aetna_2018$PRC, na.rm = TRUE)
df_Aetna_2018$PRC[is.na(df_Aetna_2018$PRC)] <- PRC_AVG

#VOL
VOL_AVG <- mean(df_Aetna_2018$VOL, na.rm = TRUE)
df_Aetna_2018$VOL[is.na(df_Aetna_2018$VOL)] <- VOL_AVG

#BID
BID_AVG <- BID_AVG <- mean(df_Aetna_2018$BID, na.rm = TRUE)
df_Aetna_2018$BID[is.na(df_Aetna_2018$BID)] <- BID_AVG

#ASK
ASK_AVG <- ASK_AVG <- mean(df_Aetna_2018$ASK, na.rm = TRUE)
df_Aetna_2018$ASK[is.na(df_Aetna_2018$ASK)] <- ASK_AVG

#OPENPRC
OPEN_AVG <- OPEN_AVG <- mean(df_Aetna_2018$OPENPRC, na.rm = TRUE)
df_Aetna_2018$OPENPRC[is.na(df_Aetna_2018$OPENPRC)] <- OPEN_AVG

df_Aetna_2018
```
```{r}
df_Aetna_2018$text <- clean_tweets(df_Aetna_2018$text)
df_Aetna_2018$text <- lemmatize_words(df_Aetna_2018$text)
```

```{r}
#NRG
#Remove columns 
df_NRG_2016 <- df_NRG_2016[,-c(6,8,9,10,15,17,18,19,20,21)] 
df_NRG_2016

#Ticker 
df_NRG_2016$TICKER[is.na(df_NRG_2016$TICKER)] <- "NRG"

#PRC
PRC_AVG <- mean(df_NRG_2016$PRC, na.rm = TRUE)
df_NRG_2016$PRC[is.na(df_NRG_2016$PRC)] <- PRC_AVG

#VOL
VOL_AVG <- mean(df_NRG_2016$VOL, na.rm = TRUE)
df_NRG_2016$VOL[is.na(df_NRG_2016$VOL)] <- VOL_AVG

#BID
BID_AVG <- BID_AVG <- mean(df_NRG_2016$BID, na.rm = TRUE)
df_NRG_2016$BID[is.na(df_NRG_2016$BID)] <- BID_AVG

#ASK
ASK_AVG <- ASK_AVG <- mean(df_NRG_2016$ASK, na.rm = TRUE)
df_NRG_2016$ASK[is.na(df_NRG_2016$ASK)] <- ASK_AVG

#OPENPRC
OPEN_AVG <- OPEN_AVG <- mean(df_NRG_2016$OPENPRC, na.rm = TRUE)
df_NRG_2016$OPENPRC[is.na(df_NRG_2016$OPENPRC)] <- OPEN_AVG

df_NRG_2016
```


```{r}
df_NRG_2016$text <- clean_tweets(df_NRG_2016$text)
df_NRG_2016$text <- lemmatize_words(df_NRG_2016$text)
```


```{r}
```
```{r}
#Remove columns 
df_NRG_2018 <- df_NRG_2018[,-c(6,8,9,10,15,17,18,19,20,21)] 
df_NRG_2018

#Ticker 
df_NRG_2018$TICKER[is.na(df_NRG_2018$TICKER)] <- "NRG"

#PRC
PRC_AVG <- mean(df_NRG_2018$PRC, na.rm = TRUE)
df_NRG_2018$PRC[is.na(df_NRG_2018$PRC)] <- PRC_AVG

#VOL
VOL_AVG <- mean(df_NRG_2018$VOL, na.rm = TRUE)
df_NRG_2018$VOL[is.na(df_NRG_2018$VOL)] <- VOL_AVG

#BID
BID_AVG <- BID_AVG <- mean(df_NRG_2018$BID, na.rm = TRUE)
df_NRG_2018$BID[is.na(df_NRG_2018$BID)] <- BID_AVG

#ASK
ASK_AVG <- ASK_AVG <- mean(df_NRG_2018$ASK, na.rm = TRUE)
df_NRG_2018$ASK[is.na(df_NRG_2018$ASK)] <- ASK_AVG

#OPENPRC
OPEN_AVG <- OPEN_AVG <- mean(df_NRG_2018$OPENPRC, na.rm = TRUE)
df_NRG_2018$OPENPRC[is.na(df_NRG_2018$OPENPRC)] <- OPEN_AVG

df_NRG_2018
```


```{r}
df_NRG_2018$text <- clean_tweets(df_NRG_2018$text)
df_NRG_2018$text <- lemmatize_words(df_NRG_2018$text)
```


```{r}
```
```{r}
#Medtronic
#Remove columns 
df_Medtronic_2016 <- df_Medtronic_2016[,-c(6,8,9,10,15,17,18,19,20,21)] 
df_Medtronic_2016

#Ticker 
df_Medtronic_2016$TICKER[is.na(df_Medtronic_2016$TICKER)] <- "MDT"

#PRC
PRC_AVG <- mean(df_Medtronic_2016$PRC, na.rm = TRUE)
df_Medtronic_2016$PRC[is.na(df_Medtronic_2016$PRC)] <- PRC_AVG

#VOL
VOL_AVG <- mean(df_Medtronic_2016$VOL, na.rm = TRUE)
df_Medtronic_2016$VOL[is.na(df_Medtronic_2016$VOL)] <- VOL_AVG

#BID
BID_AVG <- BID_AVG <- mean(df_Medtronic_2016$BID, na.rm = TRUE)
df_Medtronic_2016$BID[is.na(df_Medtronic_2016$BID)] <- BID_AVG

#ASK
ASK_AVG <- ASK_AVG <- mean(df_Medtronic_2016$ASK, na.rm = TRUE)
df_Medtronic_2016$ASK[is.na(df_Medtronic_2016$ASK)] <- ASK_AVG

#OPENPRC
OPEN_AVG <- OPEN_AVG <- mean(df_Medtronic_2016$OPENPRC, na.rm = TRUE)
df_Medtronic_2016$OPENPRC[is.na(df_Medtronic_2016$OPENPRC)] <- OPEN_AVG

df_Medtronic_2016
```
```{r}
df_Medtronic_2016$text <- clean_tweets(df_Medtronic_2016$text)
df_Medtronic_2016$text <- lemmatize_words(df_Medtronic_2016$text)
```

```{r}
#Remove columns 
df_Medtronic_2018 <- df_Medtronic_2018[,-c(6,8,9,10,15,17,18,19,20,21)] 
df_Medtronic_2018

#Ticker 
df_Medtronic_2018$TICKER[is.na(df_Medtronic_2018$TICKER)] <- "MDT"

#PRC
PRC_AVG <- mean(df_Medtronic_2018$PRC, na.rm = TRUE)
df_Medtronic_2018$PRC[is.na(df_Medtronic_2018$PRC)] <- PRC_AVG

#VOL
VOL_AVG <- mean(df_Medtronic_2018$VOL, na.rm = TRUE)
df_Medtronic_2018$VOL[is.na(df_Medtronic_2018$VOL)] <- VOL_AVG

#BID
BID_AVG <- BID_AVG <- mean(df_Medtronic_2018$BID, na.rm = TRUE)
df_Medtronic_2018$BID[is.na(df_Medtronic_2018$BID)] <- BID_AVG

#ASK
ASK_AVG <- ASK_AVG <- mean(df_Medtronic_2018$ASK, na.rm = TRUE)
df_Medtronic_2018$ASK[is.na(df_Medtronic_2018$ASK)] <- ASK_AVG

#OPENPRC
OPEN_AVG <- OPEN_AVG <- mean(df_Medtronic_2018$OPENPRC, na.rm = TRUE)
df_Medtronic_2018$OPENPRC[is.na(df_Medtronic_2018$OPENPRC)] <- OPEN_AVG

df_Medtronic_2018
```
```{r}
df_Medtronic_2018$text <- clean_tweets(df_Medtronic_2018$text)
df_Medtronic_2018$text <- lemmatize_words(df_Medtronic_2018$text)
```

```{r}
#Juniper
#Remove columns 
df_Juniper_2016 <- df_Juniper_2016[,-c(6,8,9,10,15,17,18,19,20,21)] 
df_Juniper_2016

#Ticker 
df_Juniper_2016$TICKER[is.na(df_Juniper_2016$TICKER)] <- "JNPR"

#PRC
PRC_AVG <- mean(df_Juniper_2016$PRC, na.rm = TRUE)
df_Juniper_2016$PRC[is.na(df_Juniper_2016$PRC)] <- PRC_AVG

#VOL
VOL_AVG <- mean(df_Juniper_2016$VOL, na.rm = TRUE)
df_Juniper_2016$VOL[is.na(df_Juniper_2016$VOL)] <- VOL_AVG

#BID
BID_AVG <- BID_AVG <- mean(df_Juniper_2016$BID, na.rm = TRUE)
df_Juniper_2016$BID[is.na(df_Juniper_2016$BID)] <- BID_AVG

#ASK
ASK_AVG <- ASK_AVG <- mean(df_Juniper_2016$ASK, na.rm = TRUE)
df_Juniper_2016$ASK[is.na(df_Juniper_2016$ASK)] <- ASK_AVG

#OPENPRC
OPEN_AVG <- OPEN_AVG <- mean(df_Juniper_2016$OPENPRC, na.rm = TRUE)
df_Juniper_2016$OPENPRC[is.na(df_Juniper_2016$OPENPRC)] <- OPEN_AVG

df_Juniper_2016
```
```{r}
df_Juniper_2016$text <- clean_tweets(df_Juniper_2016$text)
df_Juniper_2016$text <- lemmatize_words(df_Juniper_2016$text)
```

```{r}
#Remove columns 
df_Juniper_2018 <- df_Juniper_2018[,-c(6,8,9,10,15,17,18,19,20,21)] 
df_Juniper_2018

#Ticker 
df_Juniper_2018$TICKER[is.na(df_Juniper_2018$TICKER)] <- "JNPR"

#PRC
PRC_AVG <- mean(df_Juniper_2018$PRC, na.rm = TRUE)
df_Juniper_2018$PRC[is.na(df_Juniper_2018$PRC)] <- PRC_AVG

#VOL
VOL_AVG <- mean(df_Juniper_2018$VOL, na.rm = TRUE)
df_Juniper_2018$VOL[is.na(df_Juniper_2018$VOL)] <- VOL_AVG

#BID
BID_AVG <- BID_AVG <- mean(df_Juniper_2018$BID, na.rm = TRUE)
df_Juniper_2018$BID[is.na(df_Juniper_2018$BID)] <- BID_AVG

#ASK
ASK_AVG <- ASK_AVG <- mean(df_Juniper_2018$ASK, na.rm = TRUE)
df_Juniper_2018$ASK[is.na(df_Juniper_2018$ASK)] <- ASK_AVG

#OPENPRC
OPEN_AVG <- OPEN_AVG <- mean(df_Juniper_2018$OPENPRC, na.rm = TRUE)
df_Juniper_2018$OPENPRC[is.na(df_Juniper_2018$OPENPRC)] <- OPEN_AVG

df_Juniper_2018
```
```{r}
df_Juniper_2018$text <- clean_tweets(df_Juniper_2018$text)
df_Juniper_2018$text <- lemmatize_words(df_Juniper_2018$text)
```

```{r}
#AetnaR
df_AetnaR_2018 <- df_AetnaR_2018[,-c(6,8,9,10,15,17,18,19,20,21)] 
df_AetnaR_2018

#Ticker 
df_AetnaR_2018$TICKER[is.na(df_AetnaR_2018$TICKER)] <- "AET"

#PRC
PRC_AVG <- mean(df_AetnaR_2018$PRC, na.rm = TRUE)
df_AetnaR_2018$PRC[is.na(df_AetnaR_2018$PRC)] <- PRC_AVG

#VOL
VOL_AVG <- mean(df_AetnaR_2018$VOL, na.rm = TRUE)
df_AetnaR_2018$VOL[is.na(df_AetnaR_2018$VOL)] <- VOL_AVG

#BID
BID_AVG <- BID_AVG <- mean(df_AetnaR_2018$BID, na.rm = TRUE)
df_AetnaR_2018$BID[is.na(df_AetnaR_2018$BID)] <- BID_AVG

#ASK
ASK_AVG <- ASK_AVG <- mean(df_AetnaR_2018$ASK, na.rm = TRUE)
df_AetnaR_2018$ASK[is.na(df_AetnaR_2018$ASK)] <- ASK_AVG

#OPENPRC
OPEN_AVG <- OPEN_AVG <- mean(df_AetnaR_2018$OPENPRC, na.rm = TRUE)
df_AetnaR_2018$OPENPRC[is.na(df_AetnaR_2018$OPENPRC)] <- OPEN_AVG

df_AetnaR_2018
```


```{r}
df_AetnaR_2018$text <- clean_tweets(df_AetnaR_2018$text)
df_AetnaR_2018$text <- lemmatize_words(df_AetnaR_2018$text)
```
```{r}
#Netflix
df_Netflix_2016 <- df_Netflix_2016[,-c(6,8,9,10,15,17,18,19,20,21)] 
df_Netflix_2016

#Ticker 
df_Netflix_2016$TICKER[is.na(df_Netflix_2016$TICKER)] <- "NFLX"

#PRC
PRC_AVG <- mean(df_Netflix_2016$PRC, na.rm = TRUE)
df_Netflix_2016$PRC[is.na(df_Netflix_2016$PRC)] <- PRC_AVG

#VOL
VOL_AVG <- mean(df_Netflix_2016$VOL, na.rm = TRUE)
df_Netflix_2016$VOL[is.na(df_Netflix_2016$VOL)] <- VOL_AVG

#BID
BID_AVG <- BID_AVG <- mean(df_Netflix_2016$BID, na.rm = TRUE)
df_Netflix_2016$BID[is.na(df_Netflix_2016$BID)] <- BID_AVG

#ASK
ASK_AVG <- ASK_AVG <- mean(df_Netflix_2016$ASK, na.rm = TRUE)
df_Netflix_2016$ASK[is.na(df_Netflix_2016$ASK)] <- ASK_AVG

#OPENPRC
OPEN_AVG <- OPEN_AVG <- mean(df_Netflix_2016$OPENPRC, na.rm = TRUE)
df_Netflix_2016$OPENPRC[is.na(df_Netflix_2016$OPENPRC)] <- OPEN_AVG

df_Netflix_2016
```
```{r}
df_Netflix_2016$text <- clean_tweets(df_Netflix_2016$text)
df_Netflix_2016$text <- lemmatize_words(df_Netflix_2016$text)
```

```{r}
#Disney
df_Disney_2018 <- df_Disney_2018[,-c(6,8,9,10,15,17,18,19,20,21)] 
df_Disney_2018

#Ticker 
df_Disney_2018$TICKER[is.na(df_Disney_2018$TICKER)] <- "DIS"

#PRC
PRC_AVG <- mean(df_Disney_2018$PRC, na.rm = TRUE)
df_Disney_2018$PRC[is.na(df_Disney_2018$PRC)] <- PRC_AVG

#VOL
VOL_AVG <- mean(df_Disney_2018$VOL, na.rm = TRUE)
df_Disney_2018$VOL[is.na(df_Disney_2018$VOL)] <- VOL_AVG

#BID
BID_AVG <- BID_AVG <- mean(df_Disney_2018$BID, na.rm = TRUE)
df_Disney_2018$BID[is.na(df_Disney_2018$BID)] <- BID_AVG

#ASK
ASK_AVG <- ASK_AVG <- mean(df_Disney_2018$ASK, na.rm = TRUE)
df_Disney_2018$ASK[is.na(df_Disney_2018$ASK)] <- ASK_AVG

#OPENPRC
OPEN_AVG <- OPEN_AVG <- mean(df_Disney_2018$OPENPRC, na.rm = TRUE)
df_Disney_2018$OPENPRC[is.na(df_Disney_2018$OPENPRC)] <- OPEN_AVG

df_Disney_2018
```


```{r}
df_Disney_2018$text <- clean_tweets(df_Disney_2018$text)
df_Disney_2018$text <- lemmatize_words(df_Disney_2018$text)
```
```{r}
#Fox 
df_Fox_2015 <- df_Fox_2015[,-c(6,8,9,10,15,17,18,19,20,21)] 
df_Fox_2015

#Ticker 
df_Fox_2015$TICKER[is.na(df_Fox_2015$TICKER)] <- "FOX"

#PRC
PRC_AVG <- mean(df_Fox_2015$PRC, na.rm = TRUE)
df_Fox_2015$PRC[is.na(df_Fox_2015$PRC)] <- PRC_AVG

#VOL
VOL_AVG <- mean(df_Fox_2015$VOL, na.rm = TRUE)
df_Fox_2015$VOL[is.na(df_Fox_2015$VOL)] <- VOL_AVG

#BID
BID_AVG <- BID_AVG <- mean(df_Fox_2015$BID, na.rm = TRUE)
df_Fox_2015$BID[is.na(df_Fox_2015$BID)] <- BID_AVG

#ASK
ASK_AVG <- ASK_AVG <- mean(df_Fox_2015$ASK, na.rm = TRUE)
df_Fox_2015$ASK[is.na(df_Fox_2015$ASK)] <- ASK_AVG

#OPENPRC
OPEN_AVG <- OPEN_AVG <- mean(df_Fox_2015$OPENPRC, na.rm = TRUE)
df_Fox_2015$OPENPRC[is.na(df_Fox_2015$OPENPRC)] <- OPEN_AVG

df_Fox_2015
```


```{r}
df_Fox_2015$text <- clean_tweets(df_Fox_2015$text)
df_Fox_2015$text <- lemmatize_words(df_Fox_2015$text)
```

```{r}
#Microsoft 
df_Microsoft_2016 <- df_Microsoft_2016[,-c(6,8,9,10,15,17,18,19,20,21)] 
df_Microsoft_2016

#Ticker 
df_Microsoft_2016$TICKER[is.na(df_Microsoft_2016$TICKER)] <- "MSFT"

#PRC
PRC_AVG <- mean(df_Microsoft_2016$PRC, na.rm = TRUE)
df_Microsoft_2016$PRC[is.na(df_Microsoft_2016$PRC)] <- PRC_AVG

#VOL
VOL_AVG <- mean(df_Microsoft_2016$VOL, na.rm = TRUE)
df_Microsoft_2016$VOL[is.na(df_Microsoft_2016$VOL)] <- VOL_AVG

#BID
BID_AVG <- BID_AVG <- mean(df_Microsoft_2016$BID, na.rm = TRUE)
df_Microsoft_2016$BID[is.na(df_Microsoft_2016$BID)] <- BID_AVG

#ASK
ASK_AVG <- ASK_AVG <- mean(df_Microsoft_2016$ASK, na.rm = TRUE)
df_Microsoft_2016$ASK[is.na(df_Microsoft_2016$ASK)] <- ASK_AVG

#OPENPRC
OPEN_AVG <- OPEN_AVG <- mean(df_Microsoft_2016$OPENPRC, na.rm = TRUE)
df_Microsoft_2016$OPENPRC[is.na(df_Microsoft_2016$OPENPRC)] <- OPEN_AVG

df_Microsoft_2016
```


```{r}
df_Microsoft_2016$text <- clean_tweets(df_Microsoft_2016$text)
df_Microsoft_2016$text <- lemmatize_words(df_Microsoft_2016$text)
```
```{r}
df_Microsoft_2018 <- df_Microsoft_2018[,-c(6,8,9,10,15,17,18,19,20,21)] 
df_Microsoft_2018

#Ticker 
df_Microsoft_2018$TICKER[is.na(df_Microsoft_2018$TICKER)] <- "MSFT"

#PRC
PRC_AVG <- mean(df_Microsoft_2018$PRC, na.rm = TRUE)
df_Microsoft_2018$PRC[is.na(df_Microsoft_2018$PRC)] <- PRC_AVG

#VOL
VOL_AVG <- mean(df_Microsoft_2018$VOL, na.rm = TRUE)
df_Microsoft_2018$VOL[is.na(df_Microsoft_2018$VOL)] <- VOL_AVG

#BID
BID_AVG <- BID_AVG <- mean(df_Microsoft_2018$BID, na.rm = TRUE)
df_Microsoft_2018$BID[is.na(df_Microsoft_2018$BID)] <- BID_AVG

#ASK
ASK_AVG <- ASK_AVG <- mean(df_Microsoft_2018$ASK, na.rm = TRUE)
df_Microsoft_2018$ASK[is.na(df_Microsoft_2018$ASK)] <- ASK_AVG

#OPENPRC
OPEN_AVG <- OPEN_AVG <- mean(df_Microsoft_2018$OPENPRC, na.rm = TRUE)
df_Microsoft_2018$OPENPRC[is.na(df_Microsoft_2018$OPENPRC)] <- OPEN_AVG

df_Microsoft_2018
```


```{r}
df_Microsoft_2018$text <- clean_tweets(df_Microsoft_2018$text)
df_Microsoft_2018$text <- lemmatize_words(df_Microsoft_2018$text)
```
```{r}
#Juniper_Shaygan
df_Juniper_S_2014 <- df_Juniper_S_2014[,-c(6,8,9,10,15,17,18,19,20,21)] 
df_Juniper_S_2014

#Ticker 
df_Juniper_S_2014$TICKER[is.na(df_Juniper_S_2014$TICKER)] <- "JNPR"

#PRC
PRC_AVG <- mean(df_Juniper_S_2014$PRC, na.rm = TRUE)
df_Juniper_S_2014$PRC[is.na(df_Juniper_S_2014$PRC)] <- PRC_AVG

#VOL
VOL_AVG <- mean(df_Juniper_S_2014$VOL, na.rm = TRUE)
df_Juniper_S_2014$VOL[is.na(df_Juniper_S_2014$VOL)] <- VOL_AVG

#BID
BID_AVG <- BID_AVG <- mean(df_Juniper_S_2014$BID, na.rm = TRUE)
df_Juniper_S_2014$BID[is.na(df_Juniper_S_2014$BID)] <- BID_AVG

#ASK
ASK_AVG <- ASK_AVG <- mean(df_Juniper_S_2014$ASK, na.rm = TRUE)
df_Juniper_S_2014$ASK[is.na(df_Juniper_S_2014$ASK)] <- ASK_AVG

#OPENPRC
OPEN_AVG <- OPEN_AVG <- mean(df_Juniper_S_2014$OPENPRC, na.rm = TRUE)
df_Juniper_S_2014$OPENPRC[is.na(df_Juniper_S_2014$OPENPRC)] <- OPEN_AVG

df_Juniper_S_2014
```


```{r}
df_Juniper_S_2014$text <- clean_tweets(df_Juniper_S_2014$text)
df_Juniper_S_2014$text <- lemmatize_words(df_Juniper_S_2014$text)
```
```{r}
df_Juniper_S_2016 <- df_Juniper_S_2016[,-c(6,8,9,10,15,17,18,19,20,21)] 
df_Juniper_S_2016

#Ticker 
df_Juniper_S_2016$TICKER[is.na(df_Juniper_S_2016$TICKER)] <- "JNPR"

#PRC
PRC_AVG <- mean(df_Juniper_S_2016$PRC, na.rm = TRUE)
df_Juniper_S_2016$PRC[is.na(df_Juniper_S_2016$PRC)] <- PRC_AVG

#VOL
VOL_AVG <- mean(df_Juniper_S_2016$VOL, na.rm = TRUE)
df_Juniper_S_2016$VOL[is.na(df_Juniper_S_2016$VOL)] <- VOL_AVG

#BID
BID_AVG <- BID_AVG <- mean(df_Juniper_S_2016$BID, na.rm = TRUE)
df_Juniper_S_2016$BID[is.na(df_Juniper_S_2016$BID)] <- BID_AVG

#ASK
ASK_AVG <- ASK_AVG <- mean(df_Juniper_S_2016$ASK, na.rm = TRUE)
df_Juniper_S_2016$ASK[is.na(df_Juniper_S_2016$ASK)] <- ASK_AVG

#OPENPRC
OPEN_AVG <- OPEN_AVG <- mean(df_Juniper_S_2016$OPENPRC, na.rm = TRUE)
df_Juniper_S_2016$OPENPRC[is.na(df_Juniper_S_2016$OPENPRC)] <- OPEN_AVG

df_Juniper_S_2016
```
```{r}
df_Juniper_S_2016$text <- clean_tweets(df_Juniper_S_2016$text)
df_Juniper_S_2016$text <- lemmatize_words(df_Juniper_S_2016$text)
```

```{r}
#Synchrony Financial 
df_SYFM_2016 <- df_SYFM_2016[,-c(6,8,9,10,15,17,18,19,20,21)] 
df_SYFM_2016

#Ticker 
df_SYFM_2016$TICKER[is.na(df_SYFM_2016$TICKER)] <- "SYF"

#PRC
PRC_AVG <- mean(df_SYFM_2016$PRC, na.rm = TRUE)
df_SYFM_2016$PRC[is.na(df_SYFM_2016$PRC)] <- PRC_AVG

#VOL
VOL_AVG <- mean(df_SYFM_2016$VOL, na.rm = TRUE)
df_SYFM_2016$VOL[is.na(df_SYFM_2016$VOL)] <- VOL_AVG

#BID
BID_AVG <- BID_AVG <- mean(df_SYFM_2016$BID, na.rm = TRUE)
df_SYFM_2016$BID[is.na(df_SYFM_2016$BID)] <- BID_AVG

#ASK
ASK_AVG <- ASK_AVG <- mean(df_SYFM_2016$ASK, na.rm = TRUE)
df_SYFM_2016$ASK[is.na(df_SYFM_2016$ASK)] <- ASK_AVG

#OPENPRC
OPEN_AVG <- OPEN_AVG <- mean(df_SYFM_2016$OPENPRC, na.rm = TRUE)
df_SYFM_2016$OPENPRC[is.na(df_SYFM_2016$OPENPRC)] <- OPEN_AVG

df_SYFM_2016
```


```{r}

df_SYFM_2016$text <- clean_tweets(df_SYFM_2016$text)
df_SYFM_2016$text <- lemmatize_words(df_SYFM_2016$text)
```
```{r}
df_SYFM_2018 <- df_SYFM_2018[,-c(6,8,9,10,15,17,18,19,20,21)] 
df_SYFM_2018

#Ticker 
df_SYFM_2018$TICKER[is.na(df_SYFM_2018$TICKER)] <- "SYF"

#PRC
PRC_AVG <- mean(df_SYFM_2018$PRC, na.rm = TRUE)
df_SYFM_2018$PRC[is.na(df_SYFM_2018$PRC)] <- PRC_AVG

#VOL
VOL_AVG <- mean(df_SYFM_2018$VOL, na.rm = TRUE)
df_SYFM_2018$VOL[is.na(df_SYFM_2018$VOL)] <- VOL_AVG

#BID
BID_AVG <- BID_AVG <- mean(df_SYFM_2018$BID, na.rm = TRUE)
df_SYFM_2018$BID[is.na(df_SYFM_2018$BID)] <- BID_AVG

#ASK
ASK_AVG <- ASK_AVG <- mean(df_SYFM_2018$ASK, na.rm = TRUE)
df_SYFM_2018$ASK[is.na(df_SYFM_2018$ASK)] <- ASK_AVG

#OPENPRC
OPEN_AVG <- OPEN_AVG <- mean(df_SYFM_2018$OPENPRC, na.rm = TRUE)
df_SYFM_2018$OPENPRC[is.na(df_SYFM_2018$OPENPRC)] <- OPEN_AVG

df_SYFM_2018
```


```{r}
df_SYFM_2018$text <- clean_tweets(df_SYFM_2018$text)
df_SYFM_2018$text <- lemmatize_words(df_SYFM_2018$text)
```
```{r}
#Southern company 
df_Southern_2016 <- df_Southern_2016[,-c(6,8,9,10,15,17,18,19,20,21)] 
df_Southern_2016

#Ticker 
df_Southern_2016$TICKER[is.na(df_Southern_2016$TICKER)] <- "SO"

#PRC
PRC_AVG <- mean(df_Southern_2016$PRC, na.rm = TRUE)
df_Southern_2016$PRC[is.na(df_Southern_2016$PRC)] <- PRC_AVG

#VOL
VOL_AVG <- mean(df_Southern_2016$VOL, na.rm = TRUE)
df_Southern_2016$VOL[is.na(df_Southern_2016$VOL)] <- VOL_AVG

#BID
BID_AVG <- BID_AVG <- mean(df_Southern_2016$BID, na.rm = TRUE)
df_Southern_2016$BID[is.na(df_Southern_2016$BID)] <- BID_AVG

#ASK
ASK_AVG <- ASK_AVG <- mean(df_Southern_2016$ASK, na.rm = TRUE)
df_Southern_2016$ASK[is.na(df_Southern_2016$ASK)] <- ASK_AVG

#OPENPRC
OPEN_AVG <- OPEN_AVG <- mean(df_Southern_2016$OPENPRC, na.rm = TRUE)
df_Southern_2016$OPENPRC[is.na(df_Southern_2016$OPENPRC)] <- OPEN_AVG

df_Southern_2016
```


```{r}
df_Southern_2016$text <- clean_tweets(df_Southern_2016$text)
df_Southern_2016$text <- lemmatize_words(df_Southern_2016$text)
```
```{r}
df_Southern_2018 <- df_Southern_2018[,-c(6,8,9,10,15,17,18,19,20,21)] 
df_Southern_2018

#Ticker 
df_Southern_2018$TICKER[is.na(df_Southern_2018$TICKER)] <- "SO"

#PRC
PRC_AVG <- mean(df_Southern_2018$PRC, na.rm = TRUE)
df_Southern_2018$PRC[is.na(df_Southern_2018$PRC)] <- PRC_AVG

#VOL
VOL_AVG <- mean(df_Southern_2018$VOL, na.rm = TRUE)
df_Southern_2018$VOL[is.na(df_Southern_2018$VOL)] <- VOL_AVG

#BID
BID_AVG <- BID_AVG <- mean(df_Southern_2018$BID, na.rm = TRUE)
df_Southern_2018$BID[is.na(df_Southern_2018$BID)] <- BID_AVG

#ASK
ASK_AVG <- ASK_AVG <- mean(df_Southern_2018$ASK, na.rm = TRUE)
df_Southern_2018$ASK[is.na(df_Southern_2018$ASK)] <- ASK_AVG

#OPENPRC
OPEN_AVG <- OPEN_AVG <- mean(df_Southern_2018$OPENPRC, na.rm = TRUE)
df_Southern_2018$OPENPRC[is.na(df_Southern_2018$OPENPRC)] <- OPEN_AVG

df_Southern_2018
```


```{r}
df_Southern_2018$text <- clean_tweets(df_Southern_2018$text)
df_Southern_2018$text <- lemmatize_words(df_Southern_2018$text)
```
```{r}
#Apple
df_Apple_2016 <- df_Apple_2016[,-c(6,8,9,10,15,17,18,19,20,21)] 
df_Apple_2016

#Ticker 
df_Apple_2016$TICKER[is.na(df_Apple_2016$TICKER)] <- "AAPL"

#PRC
PRC_AVG <- mean(df_Apple_2016$PRC, na.rm = TRUE)
df_Apple_2016$PRC[is.na(df_Apple_2016$PRC)] <- PRC_AVG

#VOL
VOL_AVG <- mean(df_Apple_2016$VOL, na.rm = TRUE)
df_Apple_2016$VOL[is.na(df_Apple_2016$VOL)] <- VOL_AVG

#BID
BID_AVG <- BID_AVG <- mean(df_Apple_2016$BID, na.rm = TRUE)
df_Apple_2016$BID[is.na(df_Apple_2016$BID)] <- BID_AVG

#ASK
ASK_AVG <- ASK_AVG <- mean(df_Apple_2016$ASK, na.rm = TRUE)
df_Apple_2016$ASK[is.na(df_Apple_2016$ASK)] <- ASK_AVG

#OPENPRC
OPEN_AVG <- OPEN_AVG <- mean(df_Apple_2016$OPENPRC, na.rm = TRUE)
df_Apple_2016$OPENPRC[is.na(df_Apple_2016$OPENPRC)] <- OPEN_AVG

df_Apple_2016
```


```{r}
df_Apple_2016$text <- clean_tweets(df_Apple_2016$text)
df_Apple_2016$text <- lemmatize_words(df_Apple_2016$text)
```


```{r}
```

```{r}
df_Apple_2018 <- df_Apple_2018[,-c(6,8,9,10,15,17,18,19,20,21)] 
df_Apple_2018

#Ticker 
df_Apple_2018$TICKER[is.na(df_Apple_2018$TICKER)] <- "AAPL"

#PRC
PRC_AVG <- mean(df_Apple_2018$PRC, na.rm = TRUE)
df_Apple_2018$PRC[is.na(df_Apple_2018$PRC)] <- PRC_AVG

#VOL
VOL_AVG <- mean(df_Apple_2018$VOL, na.rm = TRUE)
df_Apple_2018$VOL[is.na(df_Apple_2018$VOL)] <- VOL_AVG

#BID
BID_AVG <- BID_AVG <- mean(df_Apple_2018$BID, na.rm = TRUE)
df_Apple_2018$BID[is.na(df_Apple_2018$BID)] <- BID_AVG

#ASK
ASK_AVG <- ASK_AVG <- mean(df_Apple_2018$ASK, na.rm = TRUE)
df_Apple_2018$ASK[is.na(df_Apple_2018$ASK)] <- ASK_AVG

#OPENPRC
OPEN_AVG <- OPEN_AVG <- mean(df_Apple_2018$OPENPRC, na.rm = TRUE)
df_Apple_2018$OPENPRC[is.na(df_Apple_2018$OPENPRC)] <- OPEN_AVG

df_Apple_2018
```
```{r}
df_Apple_2018$text <- clean_tweets(df_Apple_2018$text)
df_Apple_2018$text <- lemmatize_words(df_Apple_2018$text)
```

```{r}
#XL
df_XL_2016 <- df_XL_2016[,-c(6,8,9,10,15,17,18,19,20,21)] 
df_XL_2016

#Ticker 
df_XL_2016$TICKER[is.na(df_XL_2016$TICKER)] <- "XL"

#PRC
PRC_AVG <- mean(df_XL_2016$PRC, na.rm = TRUE)
df_XL_2016$PRC[is.na(df_XL_2016$PRC)] <- PRC_AVG

#VOL
VOL_AVG <- mean(df_XL_2016$VOL, na.rm = TRUE)
df_XL_2016$VOL[is.na(df_XL_2016$VOL)] <- VOL_AVG

#BID
BID_AVG <- BID_AVG <- mean(df_XL_2016$BID, na.rm = TRUE)
df_XL_2016$BID[is.na(df_XL_2016$BID)] <- BID_AVG

#ASK
ASK_AVG <- ASK_AVG <- mean(df_XL_2016$ASK, na.rm = TRUE)
df_XL_2016$ASK[is.na(df_XL_2016$ASK)] <- ASK_AVG

#OPENPRC
OPEN_AVG <- OPEN_AVG <- mean(df_XL_2016$OPENPRC, na.rm = TRUE)
df_XL_2016$OPENPRC[is.na(df_XL_2016$OPENPRC)] <- OPEN_AVG

df_XL_2016
```


```{r}
df_XL_2016$text <- clean_tweets(df_XL_2016$text)
df_XL_2016$text <- lemmatize_words(df_XL_2016$text)
```
```{r}
df_XL_2018 <- df_XL_2018[,-c(6,8,9,10,15,17,18,19,20,21)] 
df_XL_2018

#Ticker 
df_XL_2018$TICKER[is.na(df_XL_2018$TICKER)] <- "XL"

#PRC
PRC_AVG <- mean(df_XL_2018$PRC, na.rm = TRUE)
df_XL_2018$PRC[is.na(df_XL_2018$PRC)] <- PRC_AVG

#VOL
VOL_AVG <- mean(df_XL_2018$VOL, na.rm = TRUE)
df_XL_2018$VOL[is.na(df_XL_2018$VOL)] <- VOL_AVG

#BID
BID_AVG <- BID_AVG <- mean(df_XL_2018$BID, na.rm = TRUE)
df_XL_2018$BID[is.na(df_XL_2018$BID)] <- BID_AVG

#ASK
ASK_AVG <- ASK_AVG <- mean(df_XL_2018$ASK, na.rm = TRUE)
df_XL_2018$ASK[is.na(df_XL_2018$ASK)] <- ASK_AVG

#OPENPRC
OPEN_AVG <- OPEN_AVG <- mean(df_XL_2018$OPENPRC, na.rm = TRUE)
df_XL_2018$OPENPRC[is.na(df_XL_2018$OPENPRC)] <- OPEN_AVG

df_XL_2018
```


```{r}
df_XL_2018$text <- clean_tweets(df_XL_2018$text)
df_XL_2018$text <- lemmatize_words(df_XL_2018$text)
```
```{r}
#Tyson foods
df_Tysonf_2016 <- df_Tysonf_2016[,-c(6,8,9,10,15,17,18,19,20,21)] 
df_Tysonf_2016

#Ticker 
df_Tysonf_2016$TICKER[is.na(df_Tysonf_2016$TICKER)] <- "TSN"

#PRC
PRC_AVG <- mean(df_Tysonf_2016$PRC, na.rm = TRUE)
df_Tysonf_2016$PRC[is.na(df_Tysonf_2016$PRC)] <- PRC_AVG

#VOL
VOL_AVG <- mean(df_Tysonf_2016$VOL, na.rm = TRUE)
df_Tysonf_2016$VOL[is.na(df_Tysonf_2016$VOL)] <- VOL_AVG

#BID
BID_AVG <- BID_AVG <- mean(df_Tysonf_2016$BID, na.rm = TRUE)
df_Tysonf_2016$BID[is.na(df_Tysonf_2016$BID)] <- BID_AVG

#ASK
ASK_AVG <- ASK_AVG <- mean(df_Tysonf_2016$ASK, na.rm = TRUE)
df_Tysonf_2016$ASK[is.na(df_Tysonf_2016$ASK)] <- ASK_AVG

#OPENPRC
OPEN_AVG <- OPEN_AVG <- mean(df_Tysonf_2016$OPENPRC, na.rm = TRUE)
df_Tysonf_2016$OPENPRC[is.na(df_Tysonf_2016$OPENPRC)] <- OPEN_AVG

df_Tysonf_2016
```


```{r}
df_Tysonf_2016$text <- clean_tweets(df_Tysonf_2016$text)
df_Tysonf_2016$text <- lemmatize_words(df_Tysonf_2016$text)
```
```{r}
#Tyson foods
df_Tysonf_2018 <- df_Tysonf_2018[,-c(6,8,9,10,15,17,18,19,20,21)] 
df_Tysonf_2018

#Ticker 
df_Tysonf_2018$TICKER[is.na(df_Tysonf_2018$TICKER)] <- "TSN"

#PRC
PRC_AVG <- mean(df_Tysonf_2018$PRC, na.rm = TRUE)
df_Tysonf_2018$PRC[is.na(df_Tysonf_2018$PRC)] <- PRC_AVG

#VOL
VOL_AVG <- mean(df_Tysonf_2018$VOL, na.rm = TRUE)
df_Tysonf_2018$VOL[is.na(df_Tysonf_2018$VOL)] <- VOL_AVG

#BID
BID_AVG <- BID_AVG <- mean(df_Tysonf_2018$BID, na.rm = TRUE)
df_Tysonf_2018$BID[is.na(df_Tysonf_2018$BID)] <- BID_AVG

#ASK
ASK_AVG <- ASK_AVG <- mean(df_Tysonf_2018$ASK, na.rm = TRUE)
df_Tysonf_2018$ASK[is.na(df_Tysonf_2018$ASK)] <- ASK_AVG

#OPENPRC
OPEN_AVG <- OPEN_AVG <- mean(df_Tysonf_2018$OPENPRC, na.rm = TRUE)
df_Tysonf_2018$OPENPRC[is.na(df_Tysonf_2018$OPENPRC)] <- OPEN_AVG

df_Tysonf_2018
```


```{r}
df_Tysonf_2018$text <- clean_tweets(df_Tysonf_2018$text)
df_Tysonf_2018$text <- lemmatize_words(df_Tysonf_2018$text)
```


```{r}
```
```{r}
#Akamai
df_Akamai_2016 <- df_Akamai_2016[,-c(6,8,9,10,15,17,18,19,20,21)] 
df_Akamai_2016

#Ticker 
df_Akamai_2016$TICKER[is.na(df_Akamai_2016$TICKER)] <- "AKAM"

#PRC
PRC_AVG <- mean(df_Akamai_2016$PRC, na.rm = TRUE)
df_Akamai_2016$PRC[is.na(df_Akamai_2016$PRC)] <- PRC_AVG

#VOL
VOL_AVG <- mean(df_Akamai_2016$VOL, na.rm = TRUE)
df_Akamai_2016$VOL[is.na(df_Akamai_2016$VOL)] <- VOL_AVG

#BID
BID_AVG <- BID_AVG <- mean(df_Akamai_2016$BID, na.rm = TRUE)
df_Akamai_2016$BID[is.na(df_Akamai_2016$BID)] <- BID_AVG

#ASK
ASK_AVG <- ASK_AVG <- mean(df_Akamai_2016$ASK, na.rm = TRUE)
df_Akamai_2016$ASK[is.na(df_Akamai_2016$ASK)] <- ASK_AVG

#OPENPRC
OPEN_AVG <- OPEN_AVG <- mean(df_Akamai_2016$OPENPRC, na.rm = TRUE)
df_Akamai_2016$OPENPRC[is.na(df_Akamai_2016$OPENPRC)] <- OPEN_AVG

df_Akamai_2016

```
```{r}
df_Akamai_2016$text <- clean_tweets(df_Akamai_2016$text)
df_Akamai_2016$text <- lemmatize_words(df_Akamai_2016$text)
```

```{r}
df_Akamai_2018 <- df_Akamai_2018[,-c(6,8,9,10,15,17,18,19,20,21)] 
df_Akamai_2018

#Ticker 
df_Akamai_2018$TICKER[is.na(df_Akamai_2018$TICKER)] <- "AKAM"

#PRC
PRC_AVG <- mean(df_Akamai_2018$PRC, na.rm = TRUE)
df_Akamai_2018$PRC[is.na(df_Akamai_2018$PRC)] <- PRC_AVG

#VOL
VOL_AVG <- mean(df_Akamai_2018$VOL, na.rm = TRUE)
df_Akamai_2018$VOL[is.na(df_Akamai_2018$VOL)] <- VOL_AVG

#BID
BID_AVG <- BID_AVG <- mean(df_Akamai_2018$BID, na.rm = TRUE)
df_Akamai_2018$BID[is.na(df_Akamai_2018$BID)] <- BID_AVG

#ASK
ASK_AVG <- ASK_AVG <- mean(df_Akamai_2018$ASK, na.rm = TRUE)
df_Akamai_2018$ASK[is.na(df_Akamai_2018$ASK)] <- ASK_AVG

#OPENPRC
OPEN_AVG <- OPEN_AVG <- mean(df_Akamai_2018$OPENPRC, na.rm = TRUE)
df_Akamai_2018$OPENPRC[is.na(df_Akamai_2018$OPENPRC)] <- OPEN_AVG

df_Akamai_2018
```


```{r}
df_Akamai_2018$text <- clean_tweets(df_Akamai_2018$text)
df_Akamai_2018$text <- lemmatize_words(df_Akamai_2018$text)
```
```{r}
#Google 
df_GOOGL_2015 <-merge(GOOGL_2015, GOOGL_SP_2016, by = "date", all.x = TRUE) 
write.csv(df_GOOGL_2015, file = "Google_2015", row.names = FALSE)

df_GOOGL_2015 <- df_GOOGL_2015[,-c(6,8,9,10,15,17,18,19,20,21)] 
df_GOOGL_2015

#Ticker 
df_GOOGL_2015$TICKER[is.na(df_GOOGL_2015$TICKER)] <- "GOOGL"

#PRC
PRC_AVG <- mean(df_GOOGL_2015$PRC, na.rm = TRUE)
df_GOOGL_2015$PRC[is.na(df_GOOGL_2015$PRC)] <- PRC_AVG

#VOL
VOL_AVG <- mean(df_GOOGL_2015$VOL, na.rm = TRUE)
df_GOOGL_2015$VOL[is.na(df_GOOGL_2015$VOL)] <- VOL_AVG

#BID
BID_AVG <- BID_AVG <- mean(df_GOOGL_2015$BID, na.rm = TRUE)
df_GOOGL_2015$BID[is.na(df_GOOGL_2015$BID)] <- BID_AVG

#ASK
ASK_AVG <- ASK_AVG <- mean(df_GOOGL_2015$ASK, na.rm = TRUE)
df_GOOGL_2015$ASK[is.na(df_GOOGL_2015$ASK)] <- ASK_AVG

#OPENPRC
OPEN_AVG <- OPEN_AVG <- mean(df_GOOGL_2015$OPENPRC, na.rm = TRUE)
df_GOOGL_2015$OPENPRC[is.na(df_GOOGL_2015$OPENPRC)] <- OPEN_AVG

df_GOOGL_2015
```


```{r}
df_GOOGL_2015$text <- clean_tweets(df_GOOGL_2015$text)
df_GOOGL_2015$text <- lemmatize_words(df_GOOGL_2015$text)
```
```{r}
df_GOOGL_2018 <-merge(GOOGL_2018, GOOGLE_SP_2018, by = "date", all.x = TRUE) 
write.csv(df_GOOGL_2018, file = "Google_2018", row.names = FALSE)

df_GOOGL_2018 <- df_GOOGL_2018[,-c(6,8,9,10,15,17,18,19,20,21)] 
df_GOOGL_2018

#Ticker 
df_GOOGL_2018$TICKER[is.na(df_GOOGL_2018$TICKER)] <- "GOOGL"

#PRC
PRC_AVG <- mean(df_GOOGL_2018$PRC, na.rm = TRUE)
df_GOOGL_2018$PRC[is.na(df_GOOGL_2018$PRC)] <- PRC_AVG

#VOL
VOL_AVG <- mean(df_GOOGL_2018$VOL, na.rm = TRUE)
df_GOOGL_2018$VOL[is.na(df_GOOGL_2018$VOL)] <- VOL_AVG

#BID
BID_AVG <- BID_AVG <- mean(df_GOOGL_2018$BID, na.rm = TRUE)
df_GOOGL_2018$BID[is.na(df_GOOGL_2018$BID)] <- BID_AVG

#ASK
ASK_AVG <- ASK_AVG <- mean(df_GOOGL_2018$ASK, na.rm = TRUE)
df_GOOGL_2018$ASK[is.na(df_GOOGL_2018$ASK)] <- ASK_AVG

#OPENPRC
OPEN_AVG <- OPEN_AVG <- mean(df_GOOGL_2018$OPENPRC, na.rm = TRUE)
df_GOOGL_2018$OPENPRC[is.na(df_GOOGL_2018$OPENPRC)] <- OPEN_AVG

df_GOOGL_2018
```
```{r}
df_GOOGL_2018$text <- clean_tweets(df_GOOGL_2018$text)
df_GOOGL_2018$text <- lemmatize_words(df_GOOGL_2018$text)
```


##Covid datasets
```{r}
#WU
df_WU_covid <-merge(WU_covid, WU_SP_covid, by = "date", all.x = TRUE) 
write.csv(df_WU_covid, file = "WU_COVID", row.names = FALSE)

df_WU_covid <- df_WU_covid[,-c(6,8,9,10,15,17,18,19,20,21)] 
df_WU_covid

#Ticker 
df_WU_covid$TICKER[is.na(df_WU_covid$TICKER)] <- "WU"

#PRC
PRC_AVG <- mean(df_WU_covid$PRC, na.rm = TRUE)
df_WU_covid$PRC[is.na(df_WU_covid$PRC)] <- PRC_AVG

#VOL
VOL_AVG <- mean(df_WU_covid$VOL, na.rm = TRUE)
df_WU_covid$VOL[is.na(df_WU_covid$VOL)] <- VOL_AVG

#BID
BID_AVG <- BID_AVG <- mean(df_WU_covid$BID, na.rm = TRUE)
df_WU_covid$BID[is.na(df_WU_covid$BID)] <- BID_AVG

#ASK
ASK_AVG <- ASK_AVG <- mean(df_WU_covid$ASK, na.rm = TRUE)
df_WU_covid$ASK[is.na(df_WU_covid$ASK)] <- ASK_AVG

#OPENPRC
OPEN_AVG <- OPEN_AVG <- mean(df_WU_covid$OPENPRC, na.rm = TRUE)
df_WU_covid$OPENPRC[is.na(df_WU_covid$OPENPRC)] <- OPEN_AVG

df_WU_covid
```


```{r}
df_WU_covid$text <- clean_tweets(df_WU_covid$text)
df_WU_covid$text <- lemmatize_words(df_WU_covid$text)
```
```{r}
#AAPL
df_AAPL_covid <-merge(AAPL_covid, AAPL_SP_covid, by = "date", all.x = TRUE) 
write.csv(df_AAPL_covid, file = "AAPL_COVID", row.names = FALSE)

df_AAPL_covid <- df_AAPL_covid[,-c(6,8,9,10,15,17,18,19,20,21)] 
df_AAPL_covid

#Ticker 
df_AAPL_covid$TICKER[is.na(df_AAPL_covid$TICKER)] <- "AAPL"

#PRC
PRC_AVG <- mean(df_AAPL_covid$PRC, na.rm = TRUE)
df_AAPL_covid$PRC[is.na(df_AAPL_covid$PRC)] <- PRC_AVG

#VOL
VOL_AVG <- mean(df_AAPL_covid$VOL, na.rm = TRUE)
df_AAPL_covid$VOL[is.na(df_AAPL_covid$VOL)] <- VOL_AVG

#BID
BID_AVG <- BID_AVG <- mean(df_AAPL_covid$BID, na.rm = TRUE)
df_AAPL_covid$BID[is.na(df_AAPL_covid$BID)] <- BID_AVG

#ASK
ASK_AVG <- ASK_AVG <- mean(df_AAPL_covid$ASK, na.rm = TRUE)
df_AAPL_covid$ASK[is.na(df_AAPL_covid$ASK)] <- ASK_AVG

#OPENPRC
OPEN_AVG <- OPEN_AVG <- mean(df_AAPL_covid$OPENPRC, na.rm = TRUE)
df_AAPL_covid$OPENPRC[is.na(df_AAPL_covid$OPENPRC)] <- OPEN_AVG

df_AAPL_covid
```
```{r}
df_AAPL_covid$text <- clean_tweets(df_AAPL_covid$text)
df_AAPL_covid$text <- lemmatize_words(df_AAPL_covid$text)
```

```{r}
#RMD
df_RMD_covid <-merge(RMD_covid, RMD_SP_covid, by = "date", all.x = TRUE) 
write.csv(df_RMD_covid, file = "RMD_COVID", row.names = FALSE)

df_RMD_covid <- df_RMD_covid[,-c(6,8,9,10,15,17,18,19,20,21)] 
df_RMD_covid

#Ticker 
df_RMD_covid$TICKER[is.na(df_RMD_covid$TICKER)] <- "RMD"

#PRC
PRC_AVG <- mean(df_RMD_covid$PRC, na.rm = TRUE)
df_RMD_covid$PRC[is.na(df_RMD_covid$PRC)] <- PRC_AVG

#VOL
VOL_AVG <- mean(df_RMD_covid$VOL, na.rm = TRUE)
df_RMD_covid$VOL[is.na(df_RMD_covid$VOL)] <- VOL_AVG

#BID
BID_AVG <- BID_AVG <- mean(df_RMD_covid$BID, na.rm = TRUE)
df_RMD_covid$BID[is.na(df_RMD_covid$BID)] <- BID_AVG

#ASK
ASK_AVG <- ASK_AVG <- mean(df_RMD_covid$ASK, na.rm = TRUE)
df_RMD_covid$ASK[is.na(df_RMD_covid$ASK)] <- ASK_AVG

#OPENPRC
OPEN_AVG <- OPEN_AVG <- mean(df_RMD_covid$OPENPRC, na.rm = TRUE)
df_RMD_covid$OPENPRC[is.na(df_RMD_covid$OPENPRC)] <- OPEN_AVG

df_RMD_covid
```
```{r}
df_RMD_covid$text <- clean_tweets(df_RMD_covid$text)
df_RMD_covid$text <- lemmatize_words(df_RMD_covid$text)
```

```{r}
#GOOGL
df_GOOGL_covid <-merge(GOOGL_covid, GOOGL_SP_covid, by = "date", all.x = TRUE) 
write.csv(df_GOOGL_covid, file = "GOOGL_COVID", row.names = FALSE)

df_GOOGL_covid <- df_GOOGL_covid[,-c(6,8,9,10,15,17,18,19,20,21)] 
df_GOOGL_covid

#Ticker 
df_GOOGL_covid$TICKER[is.na(df_GOOGL_covid$TICKER)] <- "GOOGL"

#PRC
PRC_AVG <- mean(df_GOOGL_covid$PRC, na.rm = TRUE)
df_GOOGL_covid$PRC[is.na(df_GOOGL_covid$PRC)] <- PRC_AVG

#VOL
VOL_AVG <- mean(df_GOOGL_covid$VOL, na.rm = TRUE)
df_GOOGL_covid$VOL[is.na(df_GOOGL_covid$VOL)] <- VOL_AVG

#BID
BID_AVG <- BID_AVG <- mean(df_GOOGL_covid$BID, na.rm = TRUE)
df_GOOGL_covid$BID[is.na(df_GOOGL_covid$BID)] <- BID_AVG

#ASK
ASK_AVG <- ASK_AVG <- mean(df_GOOGL_covid$ASK, na.rm = TRUE)
df_GOOGL_covid$ASK[is.na(df_GOOGL_covid$ASK)] <- ASK_AVG

#OPENPRC
OPEN_AVG <- OPEN_AVG <- mean(df_GOOGL_covid$OPENPRC, na.rm = TRUE)
df_GOOGL_covid$OPENPRC[is.na(df_GOOGL_covid$OPENPRC)] <- OPEN_AVG

df_GOOGL_covid
```


```{r}
df_GOOGL_covid$text <- clean_tweets(df_GOOGL_covid$text)
df_GOOGL_covid$text <- lemmatize_words(df_GOOGL_covid$text)
```
```{r}
#AGN
df_AGN_covid <-merge(AGN_covid, AGN_SP_covid, by = "date", all.x = TRUE) 
write.csv(df_AGN_covid, file = "AGN_COVID", row.names = FALSE)

df_AGN_covid <- df_AGN_covid[,-c(6,8,9,10,15,17,18,19,20,21)] 
df_AGN_covid

#Ticker 
df_AGN_covid$TICKER[is.na(df_AGN_covid$TICKER)] <- "AGN"

#PRC
PRC_AVG <- mean(df_AGN_covid$PRC, na.rm = TRUE)
df_AGN_covid$PRC[is.na(df_AGN_covid$PRC)] <- PRC_AVG

#VOL
VOL_AVG <- mean(df_AGN_covid$VOL, na.rm = TRUE)
df_AGN_covid$VOL[is.na(df_AGN_covid$VOL)] <- VOL_AVG

#BID
BID_AVG <- BID_AVG <- mean(df_AGN_covid$BID, na.rm = TRUE)
df_AGN_covid$BID[is.na(df_AGN_covid$BID)] <- BID_AVG

#ASK
ASK_AVG <- ASK_AVG <- mean(df_AGN_covid$ASK, na.rm = TRUE)
df_AGN_covid$ASK[is.na(df_AGN_covid$ASK)] <- ASK_AVG

#OPENPRC
OPEN_AVG <- OPEN_AVG <- mean(df_AGN_covid$OPENPRC, na.rm = TRUE)
df_AGN_covid$OPENPRC[is.na(df_AGN_covid$OPENPRC)] <- OPEN_AVG

df_AGN_covid
```


```{r}
df_AGN_covid$text <- clean_tweets(df_AGN_covid$text)
df_AGN_covid$text <- lemmatize_words(df_AGN_covid$text)
```
```{r}
#AMAZON
df_AMZN_covid <-merge(AMZN_covid, AMZN_SP_covid, by = "date", all.x = TRUE) 
write.csv(df_AMZN_covid, file = "AMZN_COVID", row.names = FALSE)


df_AMZN_covid <- df_AMZN_covid[,-c(6,8,9,10,15,17,18,19,20,21)] 
df_AMZN_covid

#Ticker 
df_AMZN_covid$TICKER[is.na(df_AMZN_covid$TICKER)] <- "AMZN"

#PRC
PRC_AVG <- mean(df_AMZN_covid$PRC, na.rm = TRUE)
df_AMZN_covid$PRC[is.na(df_AMZN_covid$PRC)] <- PRC_AVG

#VOL
VOL_AVG <- mean(df_AMZN_covid$VOL, na.rm = TRUE)
df_AMZN_covid$VOL[is.na(df_AMZN_covid$VOL)] <- VOL_AVG

#BID
BID_AVG <- BID_AVG <- mean(df_AMZN_covid$BID, na.rm = TRUE)
df_AMZN_covid$BID[is.na(df_AMZN_covid$BID)] <- BID_AVG

#ASK
ASK_AVG <- ASK_AVG <- mean(df_AMZN_covid$ASK, na.rm = TRUE)
df_AMZN_covid$ASK[is.na(df_AMZN_covid$ASK)] <- ASK_AVG

#OPENPRC
OPEN_AVG <- OPEN_AVG <- mean(df_AMZN_covid$OPENPRC, na.rm = TRUE)
df_AMZN_covid$OPENPRC[is.na(df_AMZN_covid$OPENPRC)] <- OPEN_AVG

df_AMZN_covid
```


```{r}

df_AMZN_covid$text <- clean_tweets(df_AMZN_covid$text)
df_AMZN_covid$text <- lemmatize_words(df_AMZN_covid$text)
```
```{r}
#Disney
df_DIS_covid <-merge(DIS_covid, DIS_SP_covid, by = "date", all.x = TRUE) 
write.csv(df_DIS_covid, file = "DIS_COVID", row.names = FALSE)

df_DIS_covid <- df_DIS_covid[,-c(6,8,9,10,15,17,18,19,20,21)] 
df_DIS_covid

#Ticker 
df_DIS_covid$TICKER[is.na(df_DIS_covid$TICKER)] <- "DIS"

#PRC
PRC_AVG <- mean(df_DIS_covid$PRC, na.rm = TRUE)
df_DIS_covid$PRC[is.na(df_DIS_covid$PRC)] <- PRC_AVG

#VOL
VOL_AVG <- mean(df_DIS_covid$VOL, na.rm = TRUE)
df_DIS_covid$VOL[is.na(df_DIS_covid$VOL)] <- VOL_AVG

#BID
BID_AVG <- BID_AVG <- mean(df_DIS_covid$BID, na.rm = TRUE)
df_DIS_covid$BID[is.na(df_DIS_covid$BID)] <- BID_AVG

#ASK
ASK_AVG <- ASK_AVG <- mean(df_DIS_covid$ASK, na.rm = TRUE)
df_DIS_covid$ASK[is.na(df_DIS_covid$ASK)] <- ASK_AVG

#OPENPRC
OPEN_AVG <- OPEN_AVG <- mean(df_DIS_covid$OPENPRC, na.rm = TRUE)
df_DIS_covid$OPENPRC[is.na(df_DIS_covid$OPENPRC)] <- OPEN_AVG

df_DIS_covid
```


```{r}

df_DIS_covid$text <- clean_tweets(df_DIS_covid$text)
df_DIS_covid$text <- lemmatize_words(df_DIS_covid$text)
```
```{r}
#Microsoft 
df_MSFT_covid <-merge(MSFT_covid, MSFT_SP_covid, by = "date", all.x = TRUE) 
write.csv(df_MSFT_covid, file = "MSFT_COVID", row.names = FALSE)

df_MSFT_covid <- df_MSFT_covid[,-c(6,8,9,10,15,17,18,19,20,21)] 
df_MSFT_covid

#Ticker 
df_MSFT_covid$TICKER[is.na(df_MSFT_covid$TICKER)] <- "MSFT"

#PRC
PRC_AVG <- mean(df_MSFT_covid$PRC, na.rm = TRUE)
df_MSFT_covid$PRC[is.na(df_MSFT_covid$PRC)] <- PRC_AVG

#VOL
VOL_AVG <- mean(df_MSFT_covid$VOL, na.rm = TRUE)
df_MSFT_covid$VOL[is.na(df_MSFT_covid$VOL)] <- VOL_AVG

#BID
BID_AVG <- BID_AVG <- mean(df_MSFT_covid$BID, na.rm = TRUE)
df_MSFT_covid$BID[is.na(df_MSFT_covid$BID)] <- BID_AVG

#ASK
ASK_AVG <- ASK_AVG <- mean(df_MSFT_covid$ASK, na.rm = TRUE)
df_MSFT_covid$ASK[is.na(df_MSFT_covid$ASK)] <- ASK_AVG

#OPENPRC
OPEN_AVG <- OPEN_AVG <- mean(df_MSFT_covid$OPENPRC, na.rm = TRUE)
df_MSFT_covid$OPENPRC[is.na(df_MSFT_covid$OPENPRC)] <- OPEN_AVG

df_MSFT_covid
```


```{r}
df_MSFT_covid$text <- clean_tweets(df_MSFT_covid$text)
df_MSFT_covid$text <- lemmatize_words(df_MSFT_covid$text)
```
```{r}
#Medtronic
df_MDT_covid <-merge(MDT_Covid, MDT_SP_covid, by = "date", all.x = TRUE) 
write.csv(df_MDT_covid, file = "MDT_COVID", row.names = FALSE)

df_MDT_covid <- df_MDT_covid[,-c(6,8,9,10,15,17,18,19,20,21)] 
df_MDT_covid

#Ticker 
df_MDT_covid$TICKER[is.na(df_MDT_covid$TICKER)] <- "MDT"

#PRC
PRC_AVG <- mean(df_MDT_covid$PRC, na.rm = TRUE)
df_MDT_covid$PRC[is.na(df_MDT_covid$PRC)] <- PRC_AVG

#VOL
VOL_AVG <- mean(df_MDT_covid$VOL, na.rm = TRUE)
df_MDT_covid$VOL[is.na(df_MDT_covid$VOL)] <- VOL_AVG

#BID
BID_AVG <- BID_AVG <- mean(df_MDT_covid$BID, na.rm = TRUE)
df_MDT_covid$BID[is.na(df_MDT_covid$BID)] <- BID_AVG

#ASK
ASK_AVG <- ASK_AVG <- mean(df_MDT_covid$ASK, na.rm = TRUE)
df_MDT_covid$ASK[is.na(df_MDT_covid$ASK)] <- ASK_AVG

#OPENPRC
OPEN_AVG <- OPEN_AVG <- mean(df_MDT_covid$OPENPRC, na.rm = TRUE)
df_MDT_covid$OPENPRC[is.na(df_MDT_covid$OPENPRC)] <- OPEN_AVG

df_MDT_covid
```


```{r}
df_MDT_covid$text <- clean_tweets(df_MDT_covid$text)
df_MDT_covid$text <- lemmatize_words(df_MDT_covid$text)
```
```{r}
#RedHat
df_RHT_covid <-merge(RHT_covid, RHT_SP_covid, by = "date", all.x = TRUE) 
write.csv(df_RHT_covid, file = "RHT_COVID", row.names = FALSE)

df_RHT_covid <- df_RHT_covid[,-c(6,8,9,10,15,17,18,19,20,21)] 
df_RHT_covid

#Ticker 
df_RHT_covid$TICKER[is.na(df_RHT_covid$TICKER)] <- "RHT"

#PRC
PRC_AVG <- mean(df_RHT_covid$PRC, na.rm = TRUE)
df_RHT_covid$PRC[is.na(df_RHT_covid$PRC)] <- PRC_AVG

#VOL
VOL_AVG <- mean(df_RHT_covid$VOL, na.rm = TRUE)
df_RHT_covid$VOL[is.na(df_RHT_covid$VOL)] <- VOL_AVG

#BID
BID_AVG <- BID_AVG <- mean(df_RHT_covid$BID, na.rm = TRUE)
df_RHT_covid$BID[is.na(df_RHT_covid$BID)] <- BID_AVG

#ASK
ASK_AVG <- ASK_AVG <- mean(df_RHT_covid$ASK, na.rm = TRUE)
df_RHT_covid$ASK[is.na(df_RHT_covid$ASK)] <- ASK_AVG

#OPENPRC
OPEN_AVG <- OPEN_AVG <- mean(df_RHT_covid$OPENPRC, na.rm = TRUE)
df_RHT_covid$OPENPRC[is.na(df_RHT_covid$OPENPRC)] <- OPEN_AVG

df_RHT_covid
```


```{r}
df_RHT_covid$text <- clean_tweets(df_RHT_covid$text)
df_RHT_covid$text <- lemmatize_words(df_RHT_covid$text)
```

##Wordclouds 

```{r}
#Packages 
library(wordcloud)
library(RColorBrewer)
library(wordcloud2)
library(tm)
library("tidyverse")
```
```{r}
#vector containing only the text
text <- df_Test$text
#corpus  
docs <- Corpus(VectorSource(text))

docs <- docs %>%
  tm_map(removeNumbers) %>%
  tm_map(removePunctuation) %>%
  tm_map(stripWhitespace)
docs <- tm_map(docs, content_transformer(tolower))
docs <- tm_map(docs, removeWords, stopwords("english"))

dtm <- TermDocumentMatrix(docs) 
matrix <- as.matrix(dtm) 
words <- sort(rowSums(matrix),decreasing=TRUE) 
df_wc <- data.frame(word = names(words),freq=words)

wordcloud(words = df_wc$word, freq = df_wc$freq, min.freq = 1,max.words=200, random.order=FALSE, rot.per=0.35,colors=brewer.pal(8, "Dark2"))
```
```{r}
#Johnson_2016
#vector containing only the text
text <- df_Johnson_2016$text
#corpus  
docs <- Corpus(VectorSource(text))

docs <- docs %>%
  tm_map(removeNumbers) %>%
  tm_map(removePunctuation) %>%
  tm_map(stripWhitespace)
docs <- tm_map(docs, content_transformer(tolower))
docs <- tm_map(docs, removeWords, stopwords("english"))

dtm <- TermDocumentMatrix(docs) 
matrix <- as.matrix(dtm) 
words <- sort(rowSums(matrix),decreasing=TRUE) 
df_wc <- data.frame(word = names(words),freq=words)

wordcloud(words = df_wc$word, freq = df_wc$freq, min.freq = 1,max.words=200, random.order=FALSE, rot.per=0.35,colors=brewer.pal(8, "Dark2"))

```
```{r}
#Johnson 2017
#vector containing only the text
text <- df_Johnson_2017$text
#corpus  
docs <- Corpus(VectorSource(text))

docs <- docs %>%
  tm_map(removeNumbers) %>%
  tm_map(removePunctuation) %>%
  tm_map(stripWhitespace)
docs <- tm_map(docs, content_transformer(tolower))
docs <- tm_map(docs, removeWords, stopwords("english"))

dtm <- TermDocumentMatrix(docs) 
matrix <- as.matrix(dtm) 
words <- sort(rowSums(matrix),decreasing=TRUE) 
df_wc <- data.frame(word = names(words),freq=words)

wordcloud(words = df_wc$word, freq = df_wc$freq, min.freq = 1,max.words=200, random.order=FALSE, rot.per=0.35,colors=brewer.pal(8, "Dark2"))
```
```{r}
#Nasdaq
#vector containing only the text
text <- df_NASDAQ$text
#corpus  
docs <- Corpus(VectorSource(text))

docs <- docs %>%
  tm_map(removeNumbers) %>%
  tm_map(removePunctuation) %>%
  tm_map(stripWhitespace)
docs <- tm_map(docs, content_transformer(tolower))
docs <- tm_map(docs, removeWords, stopwords("english"))

dtm <- TermDocumentMatrix(docs) 
matrix <- as.matrix(dtm) 
words <- sort(rowSums(matrix),decreasing=TRUE) 
df_wc <- data.frame(word = names(words),freq=words)

wordcloud(words = df_wc$word, freq = df_wc$freq, min.freq = 1,max.words=200, random.order=FALSE, rot.per=0.35,colors=brewer.pal(8, "Dark2"))

```
```{r}
#Autodesk 
#vector containing only the text
text <- df_autodesk$text
#corpus  
docs <- Corpus(VectorSource(text))

docs <- docs %>%
  tm_map(removeNumbers) %>%
  tm_map(removePunctuation) %>%
  tm_map(stripWhitespace)
docs <- tm_map(docs, content_transformer(tolower))
docs <- tm_map(docs, removeWords, stopwords("english"))

dtm <- TermDocumentMatrix(docs) 
matrix <- as.matrix(dtm) 
words <- sort(rowSums(matrix),decreasing=TRUE) 
df_wc <- data.frame(word = names(words),freq=words)

wordcloud(words = df_wc$word, freq = df_wc$freq, min.freq = 1,max.words=200, random.order=FALSE, rot.per=0.35,colors=brewer.pal(8, "Dark2"))

```

```{r}
#Hasbro 
#vector containing only the text
text <- df_Hasbro_2016$text
#corpus  
docs <- Corpus(VectorSource(text))

docs <- docs %>%
  tm_map(removeNumbers) %>%
  tm_map(removePunctuation) %>%
  tm_map(stripWhitespace)
docs <- tm_map(docs, content_transformer(tolower))
docs <- tm_map(docs, removeWords, stopwords("english"))

dtm <- TermDocumentMatrix(docs) 
matrix <- as.matrix(dtm) 
words <- sort(rowSums(matrix),decreasing=TRUE) 
df_wc <- data.frame(word = names(words),freq=words)

wordcloud(words = df_wc$word, freq = df_wc$freq, min.freq = 1,max.words=200, random.order=FALSE, rot.per=0.35,colors=brewer.pal(8, "Dark2"))
```
```{r}
#vector containing only the text
text <- df_Hasbro_2018$text
#corpus  
docs <- Corpus(VectorSource(text))

docs <- docs %>%
  tm_map(removeNumbers) %>%
  tm_map(removePunctuation) %>%
  tm_map(stripWhitespace)
docs <- tm_map(docs, content_transformer(tolower))
docs <- tm_map(docs, removeWords, stopwords("english"))

dtm <- TermDocumentMatrix(docs) 
matrix <- as.matrix(dtm) 
words <- sort(rowSums(matrix),decreasing=TRUE) 
df_wc <- data.frame(word = names(words),freq=words)

wordcloud(words = df_wc$word, freq = df_wc$freq, min.freq = 1,max.words=200, random.order=FALSE, rot.per=0.35,colors=brewer.pal(8, "Dark2"))
```

```{r}
#Intel 2016
#vector containing only the text
text <- df_Intel_2016$text
#corpus  
docs <- Corpus(VectorSource(text))

docs <- docs %>%
  tm_map(removeNumbers) %>%
  tm_map(removePunctuation) %>%
  tm_map(stripWhitespace)
docs <- tm_map(docs, content_transformer(tolower))
docs <- tm_map(docs, removeWords, stopwords("english"))

dtm <- TermDocumentMatrix(docs) 
matrix <- as.matrix(dtm) 
words <- sort(rowSums(matrix),decreasing=TRUE) 
df_wc <- data.frame(word = names(words),freq=words)

wordcloud(words = df_wc$word, freq = df_wc$freq, min.freq = 1,max.words=200, random.order=FALSE, rot.per=0.35,colors=brewer.pal(8, "Dark2"))
```
```{r}
#Intel 2018
#vector containing only the text
text <- df_Intel_2018$text
#corpus  
docs <- Corpus(VectorSource(text))

docs <- docs %>%
  tm_map(removeNumbers) %>%
  tm_map(removePunctuation) %>%
  tm_map(stripWhitespace)
docs <- tm_map(docs, content_transformer(tolower))
docs <- tm_map(docs, removeWords, stopwords("english"))

dtm <- TermDocumentMatrix(docs) 
matrix <- as.matrix(dtm) 
words <- sort(rowSums(matrix),decreasing=TRUE) 
df_wc <- data.frame(word = names(words),freq=words)

wordcloud(words = df_wc$word, freq = df_wc$freq, min.freq = 1,max.words=200, random.order=FALSE, rot.per=0.35,colors=brewer.pal(8, "Dark2"))
```
```{r}
#Activision 2016
#vector containing only the text
text <- df_Activision_2016$text
#corpus  
docs <- Corpus(VectorSource(text))

docs <- docs %>%
  tm_map(removeNumbers) %>%
  tm_map(removePunctuation) %>%
  tm_map(stripWhitespace)
docs <- tm_map(docs, content_transformer(tolower))
docs <- tm_map(docs, removeWords, stopwords("english"))

dtm <- TermDocumentMatrix(docs) 
matrix <- as.matrix(dtm) 
words <- sort(rowSums(matrix),decreasing=TRUE) 
df_wc <- data.frame(word = names(words),freq=words)

wordcloud(words = df_wc$word, freq = df_wc$freq, min.freq = 1,max.words=200, random.order=FALSE, rot.per=0.35,colors=brewer.pal(8, "Dark2"))
```
```{r}
#Activision 2016
#vector containing only the text
text <- df_Activision_2018$text
#corpus  
docs <- Corpus(VectorSource(text))

docs <- docs %>%
  tm_map(removeNumbers) %>%
  tm_map(removePunctuation) %>%
  tm_map(stripWhitespace)
docs <- tm_map(docs, content_transformer(tolower))
docs <- tm_map(docs, removeWords, stopwords("english"))

dtm <- TermDocumentMatrix(docs) 
matrix <- as.matrix(dtm) 
words <- sort(rowSums(matrix),decreasing=TRUE) 
df_wc <- data.frame(word = names(words),freq=words)

wordcloud(words = df_wc$word, freq = df_wc$freq, min.freq = 1,max.words=200, random.order=FALSE, rot.per=0.35,colors=brewer.pal(8, "Dark2"))
```
```{r}
#Inuit 2016
#vector containing only the text
text <- df_Inuit_2016$text
#corpus  
docs <- Corpus(VectorSource(text))

docs <- docs %>%
  tm_map(removeNumbers) %>%
  tm_map(removePunctuation) %>%
  tm_map(stripWhitespace)
docs <- tm_map(docs, content_transformer(tolower))
docs <- tm_map(docs, removeWords, stopwords("english"))

dtm <- TermDocumentMatrix(docs) 
matrix <- as.matrix(dtm) 
words <- sort(rowSums(matrix),decreasing=TRUE) 
df_wc <- data.frame(word = names(words),freq=words)

wordcloud(words = df_wc$word, freq = df_wc$freq, min.freq = 1,max.words=200, random.order=FALSE, rot.per=0.35,colors=brewer.pal(8, "Dark2"))
```

```{r}
#Inuit 2018
#vector containing only the text
text <- df_Inuit_2018$text
#corpus  
docs <- Corpus(VectorSource(text))

docs <- docs %>%
  tm_map(removeNumbers) %>%
  tm_map(removePunctuation) %>%
  tm_map(stripWhitespace)
docs <- tm_map(docs, content_transformer(tolower))
docs <- tm_map(docs, removeWords, stopwords("english"))

dtm <- TermDocumentMatrix(docs) 
matrix <- as.matrix(dtm) 
words <- sort(rowSums(matrix),decreasing=TRUE) 
df_wc <- data.frame(word = names(words),freq=words)

wordcloud(words = df_wc$word, freq = df_wc$freq, min.freq = 1,max.words=200, random.order=FALSE, rot.per=0.35,colors=brewer.pal(8, "Dark2"))
```
```{r}
#Allergan 2016
#vector containing only the text
text <- df_Allergan_2016$text
#corpus  
docs <- Corpus(VectorSource(text))

docs <- docs %>%
  tm_map(removeNumbers) %>%
  tm_map(removePunctuation) %>%
  tm_map(stripWhitespace)
docs <- tm_map(docs, content_transformer(tolower))
docs <- tm_map(docs, removeWords, stopwords("english"))

dtm <- TermDocumentMatrix(docs) 
matrix <- as.matrix(dtm) 
words <- sort(rowSums(matrix),decreasing=TRUE) 
df_wc <- data.frame(word = names(words),freq=words)

wordcloud(words = df_wc$word, freq = df_wc$freq, min.freq = 1,max.words=200, random.order=FALSE, rot.per=0.35,colors=brewer.pal(8, "Dark2"))
```

```{r}
#Allergan 2018
#vector containing only the text
text <- df_Allergan_2018$text
#corpus  
docs <- Corpus(VectorSource(text))

docs <- docs %>%
  tm_map(removeNumbers) %>%
  tm_map(removePunctuation) %>%
  tm_map(stripWhitespace)
docs <- tm_map(docs, content_transformer(tolower))
docs <- tm_map(docs, removeWords, stopwords("english"))

dtm <- TermDocumentMatrix(docs) 
matrix <- as.matrix(dtm) 
words <- sort(rowSums(matrix),decreasing=TRUE) 
df_wc <- data.frame(word = names(words),freq=words)

wordcloud(words = df_wc$word, freq = df_wc$freq, min.freq = 1,max.words=200, random.order=FALSE, rot.per=0.35,colors=brewer.pal(8, "Dark2"))
```
```{r}
#Humana 2016
#vector containing only the text
text <- df_Humana_2016$text
#corpus  
docs <- Corpus(VectorSource(text))

docs <- docs %>%
  tm_map(removeNumbers) %>%
  tm_map(removePunctuation) %>%
  tm_map(stripWhitespace)
docs <- tm_map(docs, content_transformer(tolower))
docs <- tm_map(docs, removeWords, stopwords("english"))

dtm <- TermDocumentMatrix(docs) 
matrix <- as.matrix(dtm) 
words <- sort(rowSums(matrix),decreasing=TRUE) 
df_wc <- data.frame(word = names(words),freq=words)

wordcloud(words = df_wc$word, freq = df_wc$freq, min.freq = 1,max.words=200, random.order=FALSE, rot.per=0.35,colors=brewer.pal(8, "Dark2"))
```
```{r}
#Humana 2018
#vector containing only the text
text <- df_Humana_2018$text
#corpus  
docs <- Corpus(VectorSource(text))

docs <- docs %>%
  tm_map(removeNumbers) %>%
  tm_map(removePunctuation) %>%
  tm_map(stripWhitespace)
docs <- tm_map(docs, content_transformer(tolower))
docs <- tm_map(docs, removeWords, stopwords("english"))

dtm <- TermDocumentMatrix(docs) 
matrix <- as.matrix(dtm) 
words <- sort(rowSums(matrix),decreasing=TRUE) 
df_wc <- data.frame(word = names(words),freq=words)

wordcloud(words = df_wc$word, freq = df_wc$freq, min.freq = 1,max.words=200, random.order=FALSE, rot.per=0.35,colors=brewer.pal(8, "Dark2"))
```
```{r}
#Autodesk CB 2016
#vector containing only the text
text <- df_CB_autodesk_2016$text
#corpus  
docs <- Corpus(VectorSource(text))

docs <- docs %>%
  tm_map(removeNumbers) %>%
  tm_map(removePunctuation) %>%
  tm_map(stripWhitespace)
docs <- tm_map(docs, content_transformer(tolower))
docs <- tm_map(docs, removeWords, stopwords("english"))

dtm <- TermDocumentMatrix(docs) 
matrix <- as.matrix(dtm) 
words <- sort(rowSums(matrix),decreasing=TRUE) 
df_wc <- data.frame(word = names(words),freq=words)

wordcloud(words = df_wc$word, freq = df_wc$freq, min.freq = 1,max.words=200, random.order=FALSE, rot.per=0.35,colors=brewer.pal(8, "Dark2"))
```

```{r}
#Autodesk 2018
#vector containing only the text
text <- df_CB_autodesk_2018$text
#corpus  
docs <- Corpus(VectorSource(text))

docs <- docs %>%
  tm_map(removeNumbers) %>%
  tm_map(removePunctuation) %>%
  tm_map(stripWhitespace)
docs <- tm_map(docs, content_transformer(tolower))
docs <- tm_map(docs, removeWords, stopwords("english"))

dtm <- TermDocumentMatrix(docs) 
matrix <- as.matrix(dtm) 
words <- sort(rowSums(matrix),decreasing=TRUE) 
df_wc <- data.frame(word = names(words),freq=words)

wordcloud(words = df_wc$word, freq = df_wc$freq, min.freq = 1,max.words=200, random.order=FALSE, rot.per=0.35,colors=brewer.pal(8, "Dark2"))
```
```{r}
#Equinox 2018
#vector containing only the text
text <- df_equinox_2018$text
#corpus  
docs <- Corpus(VectorSource(text))

docs <- docs %>%
  tm_map(removeNumbers) %>%
  tm_map(removePunctuation) %>%
  tm_map(stripWhitespace)
docs <- tm_map(docs, content_transformer(tolower))
docs <- tm_map(docs, removeWords, stopwords("english"))

dtm <- TermDocumentMatrix(docs) 
matrix <- as.matrix(dtm) 
words <- sort(rowSums(matrix),decreasing=TRUE) 
df_wc <- data.frame(word = names(words),freq=words)

wordcloud(words = df_wc$word, freq = df_wc$freq, min.freq = 1,max.words=200, random.order=FALSE, rot.per=0.35,colors=brewer.pal(8, "Dark2"))
```

```{r}
#Cisco 2016
#vector containing only the text
text <- df_cisco_2016$text
#corpus  
docs <- Corpus(VectorSource(text))

docs <- docs %>%
  tm_map(removeNumbers) %>%
  tm_map(removePunctuation) %>%
  tm_map(stripWhitespace)
docs <- tm_map(docs, content_transformer(tolower))
docs <- tm_map(docs, removeWords, stopwords("english"))

dtm <- TermDocumentMatrix(docs) 
matrix <- as.matrix(dtm) 
words <- sort(rowSums(matrix),decreasing=TRUE) 
df_wc <- data.frame(word = names(words),freq=words)

wordcloud(words = df_wc$word, freq = df_wc$freq, min.freq = 1,max.words=200, random.order=FALSE, rot.per=0.35,colors=brewer.pal(8, "Dark2"))
```

```{r}
#Cisco 2018
#vector containing only the text
text <- df_cisco_2018$text
#corpus  
docs <- Corpus(VectorSource(text))

docs <- docs %>%
  tm_map(removeNumbers) %>%
  tm_map(removePunctuation) %>%
  tm_map(stripWhitespace)
docs <- tm_map(docs, content_transformer(tolower))
docs <- tm_map(docs, removeWords, stopwords("english"))

dtm <- TermDocumentMatrix(docs) 
matrix <- as.matrix(dtm) 
words <- sort(rowSums(matrix),decreasing=TRUE) 
df_wc <- data.frame(word = names(words),freq=words)

wordcloud(words = df_wc$word, freq = df_wc$freq, min.freq = 1,max.words=200, random.order=FALSE, rot.per=0.35,colors=brewer.pal(8, "Dark2"))
```
```{r}
#EBAY 2016
#vector containing only the text
text <- df_EBAY_2016$text
#corpus  
docs <- Corpus(VectorSource(text))

docs <- docs %>%
  tm_map(removeNumbers) %>%
  tm_map(removePunctuation) %>%
  tm_map(stripWhitespace)
docs <- tm_map(docs, content_transformer(tolower))
docs <- tm_map(docs, removeWords, stopwords("english"))

dtm <- TermDocumentMatrix(docs) 
matrix <- as.matrix(dtm) 
words <- sort(rowSums(matrix),decreasing=TRUE) 
df_wc <- data.frame(word = names(words),freq=words)

wordcloud(words = df_wc$word, freq = df_wc$freq, min.freq = 1,max.words=200, random.order=FALSE, rot.per=0.35,colors=brewer.pal(8, "Dark2"))
```
```{r}
#EBAY 2018
#vector containing only the text
text <- df_EBAY_2018$text
#corpus  
docs <- Corpus(VectorSource(text))

docs <- docs %>%
  tm_map(removeNumbers) %>%
  tm_map(removePunctuation) %>%
  tm_map(stripWhitespace)
docs <- tm_map(docs, content_transformer(tolower))
docs <- tm_map(docs, removeWords, stopwords("english"))

dtm <- TermDocumentMatrix(docs) 
matrix <- as.matrix(dtm) 
words <- sort(rowSums(matrix),decreasing=TRUE) 
df_wc <- data.frame(word = names(words),freq=words)

wordcloud(words = df_wc$word, freq = df_wc$freq, min.freq = 1,max.words=200, random.order=FALSE, rot.per=0.35,colors=brewer.pal(8, "Dark2"))

```

```{r}
#Davita 2016
#vector containing only the text
text <- df_Davita_2016$text
#corpus  
docs <- Corpus(VectorSource(text))

docs <- docs %>%
  tm_map(removeNumbers) %>%
  tm_map(removePunctuation) %>%
  tm_map(stripWhitespace)
docs <- tm_map(docs, content_transformer(tolower))
docs <- tm_map(docs, removeWords, stopwords("english"))

dtm <- TermDocumentMatrix(docs) 
matrix <- as.matrix(dtm) 
words <- sort(rowSums(matrix),decreasing=TRUE) 
df_wc <- data.frame(word = names(words),freq=words)

wordcloud(words = df_wc$word, freq = df_wc$freq, min.freq = 1,max.words=200, random.order=FALSE, rot.per=0.35,colors=brewer.pal(8, "Dark2"))
```
```{r}
#Davita 2018
#vector containing only the text
text <- df_Davita_2017$text
#corpus  
docs <- Corpus(VectorSource(text))

docs <- docs %>%
  tm_map(removeNumbers) %>%
  tm_map(removePunctuation) %>%
  tm_map(stripWhitespace)
docs <- tm_map(docs, content_transformer(tolower))
docs <- tm_map(docs, removeWords, stopwords("english"))

dtm <- TermDocumentMatrix(docs) 
matrix <- as.matrix(dtm) 
words <- sort(rowSums(matrix),decreasing=TRUE) 
df_wc <- data.frame(word = names(words),freq=words)

wordcloud(words = df_wc$word, freq = df_wc$freq, min.freq = 1,max.words=200, random.order=FALSE, rot.per=0.35,colors=brewer.pal(8, "Dark2"))
```
```{r}
#Illumina 2016
#vector containing only the text
text <- df_Illumina_2016$text
#corpus  
docs <- Corpus(VectorSource(text))

docs <- docs %>%
  tm_map(removeNumbers) %>%
  tm_map(removePunctuation) %>%
  tm_map(stripWhitespace)
docs <- tm_map(docs, content_transformer(tolower))
docs <- tm_map(docs, removeWords, stopwords("english"))

dtm <- TermDocumentMatrix(docs) 
matrix <- as.matrix(dtm) 
words <- sort(rowSums(matrix),decreasing=TRUE) 
df_wc <- data.frame(word = names(words),freq=words)

wordcloud(words = df_wc$word, freq = df_wc$freq, min.freq = 1,max.words=200, random.order=FALSE, rot.per=0.35,colors=brewer.pal(8, "Dark2"))
```
```{r}
#Illumina 2018
#vector containing only the text
text <- df_Illumina_2018$text
#corpus  
docs <- Corpus(VectorSource(text))

docs <- docs %>%
  tm_map(removeNumbers) %>%
  tm_map(removePunctuation) %>%
  tm_map(stripWhitespace)
docs <- tm_map(docs, content_transformer(tolower))
docs <- tm_map(docs, removeWords, stopwords("english"))

dtm <- TermDocumentMatrix(docs) 
matrix <- as.matrix(dtm) 
words <- sort(rowSums(matrix),decreasing=TRUE) 
df_wc <- data.frame(word = names(words),freq=words)

wordcloud(words = df_wc$word, freq = df_wc$freq, min.freq = 1,max.words=200, random.order=FALSE, rot.per=0.35,colors=brewer.pal(8, "Dark2"))
```
```{r}
#Homedepot
#vector containing only the text
text <- df_Homedepot_2013$text
#corpus  
docs <- Corpus(VectorSource(text))

docs <- docs %>%
  tm_map(removeNumbers) %>%
  tm_map(removePunctuation) %>%
  tm_map(stripWhitespace)
docs <- tm_map(docs, content_transformer(tolower))
docs <- tm_map(docs, removeWords, stopwords("english"))

dtm <- TermDocumentMatrix(docs) 
matrix <- as.matrix(dtm) 
words <- sort(rowSums(matrix),decreasing=TRUE) 
df_wc <- data.frame(word = names(words),freq=words)

wordcloud(words = df_wc$word, freq = df_wc$freq, min.freq = 1,max.words=200, random.order=FALSE, rot.per=0.35,colors=brewer.pal(8, "Dark2"))
```
```{r}
#Southwest airlines 2016
#vector containing only the text
text <- df_Southwest_2016$text
#corpus  
docs <- Corpus(VectorSource(text))

docs <- docs %>%
  tm_map(removeNumbers) %>%
  tm_map(removePunctuation) %>%
  tm_map(stripWhitespace)
docs <- tm_map(docs, content_transformer(tolower))
docs <- tm_map(docs, removeWords, stopwords("english"))

dtm <- TermDocumentMatrix(docs) 
matrix <- as.matrix(dtm) 
words <- sort(rowSums(matrix),decreasing=TRUE) 
df_wc <- data.frame(word = names(words),freq=words)

wordcloud(words = df_wc$word, freq = df_wc$freq, min.freq = 1,max.words=200, random.order=FALSE, rot.per=0.35,colors=brewer.pal(8, "Dark2"))
```
```{r}
#Southwest Airlines 2018
#vector containing only the text
text <- df_Southwest_2018$text
#corpus  
docs <- Corpus(VectorSource(text))

docs <- docs %>%
  tm_map(removeNumbers) %>%
  tm_map(removePunctuation) %>%
  tm_map(stripWhitespace)
docs <- tm_map(docs, content_transformer(tolower))
docs <- tm_map(docs, removeWords, stopwords("english"))

dtm <- TermDocumentMatrix(docs) 
matrix <- as.matrix(dtm) 
words <- sort(rowSums(matrix),decreasing=TRUE) 
df_wc <- data.frame(word = names(words),freq=words)

wordcloud(words = df_wc$word, freq = df_wc$freq, min.freq = 1,max.words=200, random.order=FALSE, rot.per=0.35,colors=brewer.pal(8, "Dark2"))
```
```{r}
#FIS 2016
#vector containing only the text
text <- df_FIS_2016$text
#corpus  
docs <- Corpus(VectorSource(text))

docs <- docs %>%
  tm_map(removeNumbers) %>%
  tm_map(removePunctuation) %>%
  tm_map(stripWhitespace)
docs <- tm_map(docs, content_transformer(tolower))
docs <- tm_map(docs, removeWords, stopwords("english"))

dtm <- TermDocumentMatrix(docs) 
matrix <- as.matrix(dtm) 
words <- sort(rowSums(matrix),decreasing=TRUE) 
df_wc <- data.frame(word = names(words),freq=words)

wordcloud(words = df_wc$word, freq = df_wc$freq, min.freq = 1,max.words=200, random.order=FALSE, rot.per=0.35,colors=brewer.pal(8, "Dark2"))
```
```{r}
#FIS 2018
#vector containing only the text
text <- df_FIS_2018$text
#corpus  
docs <- Corpus(VectorSource(text))

docs <- docs %>%
  tm_map(removeNumbers) %>%
  tm_map(removePunctuation) %>%
  tm_map(stripWhitespace)
docs <- tm_map(docs, content_transformer(tolower))
docs <- tm_map(docs, removeWords, stopwords("english"))

dtm <- TermDocumentMatrix(docs) 
matrix <- as.matrix(dtm) 
words <- sort(rowSums(matrix),decreasing=TRUE) 
df_wc <- data.frame(word = names(words),freq=words)

wordcloud(words = df_wc$word, freq = df_wc$freq, min.freq = 1,max.words=200, random.order=FALSE, rot.per=0.35,colors=brewer.pal(8, "Dark2"))
```
```{r}
#Leucadia Nation 2016
#vector containing only the text
text <- df_Leucadia_2016$text
#corpus  
docs <- Corpus(VectorSource(text))

docs <- docs %>%
  tm_map(removeNumbers) %>%
  tm_map(removePunctuation) %>%
  tm_map(stripWhitespace)
docs <- tm_map(docs, content_transformer(tolower))
docs <- tm_map(docs, removeWords, stopwords("english"))

dtm <- TermDocumentMatrix(docs) 
matrix <- as.matrix(dtm) 
words <- sort(rowSums(matrix),decreasing=TRUE) 
df_wc <- data.frame(word = names(words),freq=words)

wordcloud(words = df_wc$word, freq = df_wc$freq, min.freq = 1,max.words=200, random.order=FALSE, rot.per=0.35,colors=brewer.pal(8, "Dark2"))
```
```{r}
#Leucadia Nation 2018
#vector containing only the text
text <- df_Leucadia_2018$text
#corpus  
docs <- Corpus(VectorSource(text))

docs <- docs %>%
  tm_map(removeNumbers) %>%
  tm_map(removePunctuation) %>%
  tm_map(stripWhitespace)
docs <- tm_map(docs, content_transformer(tolower))
docs <- tm_map(docs, removeWords, stopwords("english"))

dtm <- TermDocumentMatrix(docs) 
matrix <- as.matrix(dtm) 
words <- sort(rowSums(matrix),decreasing=TRUE) 
df_wc <- data.frame(word = names(words),freq=words)

wordcloud(words = df_wc$word, freq = df_wc$freq, min.freq = 1,max.words=200, random.order=FALSE, rot.per=0.35,colors=brewer.pal(8, "Dark2"))

```
```{r}
#Verizon
#vector containing only the text
text <- df_Verizon_2018$text
#corpus  
docs <- Corpus(VectorSource(text))

docs <- docs %>%
  tm_map(removeNumbers) %>%
  tm_map(removePunctuation) %>%
  tm_map(stripWhitespace)
docs <- tm_map(docs, content_transformer(tolower))
docs <- tm_map(docs, removeWords, stopwords("english"))

dtm <- TermDocumentMatrix(docs) 
matrix <- as.matrix(dtm) 
words <- sort(rowSums(matrix),decreasing=TRUE) 
df_wc <- data.frame(word = names(words),freq=words)

wordcloud(words = df_wc$word, freq = df_wc$freq, min.freq = 1,max.words=200, random.order=FALSE, rot.per=0.35,colors=brewer.pal(8, "Dark2"))
```
```{r}
#Western Union 2016
#vector containing only the text
text <- df_WU_2016$text
#corpus  
docs <- Corpus(VectorSource(text))

docs <- docs %>%
  tm_map(removeNumbers) %>%
  tm_map(removePunctuation) %>%
  tm_map(stripWhitespace)
docs <- tm_map(docs, content_transformer(tolower))
docs <- tm_map(docs, removeWords, stopwords("english"))

dtm <- TermDocumentMatrix(docs) 
matrix <- as.matrix(dtm) 
words <- sort(rowSums(matrix),decreasing=TRUE) 
df_wc <- data.frame(word = names(words),freq=words)

wordcloud(words = df_wc$word, freq = df_wc$freq, min.freq = 1,max.words=200, random.order=FALSE, rot.per=0.35,colors=brewer.pal(8, "Dark2"))
```
```{r}
#Western Union 2018
#vector containing only the text
text <- df_WU_2018$text
#corpus  
docs <- Corpus(VectorSource(text))

docs <- docs %>%
  tm_map(removeNumbers) %>%
  tm_map(removePunctuation) %>%
  tm_map(stripWhitespace)
docs <- tm_map(docs, content_transformer(tolower))
docs <- tm_map(docs, removeWords, stopwords("english"))

dtm <- TermDocumentMatrix(docs) 
matrix <- as.matrix(dtm) 
words <- sort(rowSums(matrix),decreasing=TRUE) 
df_wc <- data.frame(word = names(words),freq=words)

wordcloud(words = df_wc$word, freq = df_wc$freq, min.freq = 1,max.words=200, random.order=FALSE, rot.per=0.35,colors=brewer.pal(8, "Dark2"))
```
```{r}
#RedHat 2016
#vector containing only the text
text <- df_RedHat_2016$text
#corpus  
docs <- Corpus(VectorSource(text))

docs <- docs %>%
  tm_map(removeNumbers) %>%
  tm_map(removePunctuation) %>%
  tm_map(stripWhitespace)
docs <- tm_map(docs, content_transformer(tolower))
docs <- tm_map(docs, removeWords, stopwords("english"))

dtm <- TermDocumentMatrix(docs) 
matrix <- as.matrix(dtm) 
words <- sort(rowSums(matrix),decreasing=TRUE) 
df_wc <- data.frame(word = names(words),freq=words)

wordcloud(words = df_wc$word, freq = df_wc$freq, min.freq = 1,max.words=200, random.order=FALSE, rot.per=0.35,colors=brewer.pal(8, "Dark2"))

```
```{r}
#RedHat 2018
#vector containing only the text
text <- df_RedHat_2018$text
#corpus  
docs <- Corpus(VectorSource(text))

docs <- docs %>%
  tm_map(removeNumbers) %>%
  tm_map(removePunctuation) %>%
  tm_map(stripWhitespace)
docs <- tm_map(docs, content_transformer(tolower))
docs <- tm_map(docs, removeWords, stopwords("english"))

dtm <- TermDocumentMatrix(docs) 
matrix <- as.matrix(dtm) 
words <- sort(rowSums(matrix),decreasing=TRUE) 
df_wc <- data.frame(word = names(words),freq=words)

wordcloud(words = df_wc$word, freq = df_wc$freq, min.freq = 1,max.words=200, random.order=FALSE, rot.per=0.35,colors=brewer.pal(8, "Dark2"))

```
```{r}
#Amazon 2016
#vector containing only the text
text <- df_AMZN_2016$text
#corpus  
docs <- Corpus(VectorSource(text))

docs <- docs %>%
  tm_map(removeNumbers) %>%
  tm_map(removePunctuation) %>%
  tm_map(stripWhitespace)
docs <- tm_map(docs, content_transformer(tolower))
docs <- tm_map(docs, removeWords, stopwords("english"))

dtm <- TermDocumentMatrix(docs) 
matrix <- as.matrix(dtm) 
words <- sort(rowSums(matrix),decreasing=TRUE) 
df_wc <- data.frame(word = names(words),freq=words)

wordcloud(words = df_wc$word, freq = df_wc$freq, min.freq = 1,max.words=200, random.order=FALSE, rot.per=0.35,colors=brewer.pal(8, "Dark2"))
```
```{r}
#Amazon 2018
#vector containing only the text
text <- df_AMZN_2018$text
#corpus  
docs <- Corpus(VectorSource(text))

docs <- docs %>%
  tm_map(removeNumbers) %>%
  tm_map(removePunctuation) %>%
  tm_map(stripWhitespace)
docs <- tm_map(docs, content_transformer(tolower))
docs <- tm_map(docs, removeWords, stopwords("english"))

dtm <- TermDocumentMatrix(docs) 
matrix <- as.matrix(dtm) 
words <- sort(rowSums(matrix),decreasing=TRUE) 
df_wc <- data.frame(word = names(words),freq=words)

wordcloud(words = df_wc$word, freq = df_wc$freq, min.freq = 1,max.words=200, random.order=FALSE, rot.per=0.35,colors=brewer.pal(8, "Dark2"))
```
```{r}
#GE 2016
#vector containing only the text
text <- df_GE_2016$text
#corpus  
docs <- Corpus(VectorSource(text))

docs <- docs %>%
  tm_map(removeNumbers) %>%
  tm_map(removePunctuation) %>%
  tm_map(stripWhitespace)
docs <- tm_map(docs, content_transformer(tolower))
docs <- tm_map(docs, removeWords, stopwords("english"))

dtm <- TermDocumentMatrix(docs) 
matrix <- as.matrix(dtm) 
words <- sort(rowSums(matrix),decreasing=TRUE) 
df_wc <- data.frame(word = names(words),freq=words)

wordcloud(words = df_wc$word, freq = df_wc$freq, min.freq = 1,max.words=200, random.order=FALSE, rot.per=0.35,colors=brewer.pal(8, "Dark2"))

```

```{r}
#GE 2017
#vector containing only the text
text <- df_GE_2017$text
#corpus  
docs <- Corpus(VectorSource(text))

docs <- docs %>%
  tm_map(removeNumbers) %>%
  tm_map(removePunctuation) %>%
  tm_map(stripWhitespace)
docs <- tm_map(docs, content_transformer(tolower))
docs <- tm_map(docs, removeWords, stopwords("english"))

dtm <- TermDocumentMatrix(docs) 
matrix <- as.matrix(dtm) 
words <- sort(rowSums(matrix),decreasing=TRUE) 
df_wc <- data.frame(word = names(words),freq=words)

wordcloud(words = df_wc$word, freq = df_wc$freq, min.freq = 1,max.words=200, random.order=FALSE, rot.per=0.35,colors=brewer.pal(8, "Dark2"))

```
```{r}
#Fiserv 2016
#vector containing only the text
text <- df_Fiserv_2016$text
#corpus  
docs <- Corpus(VectorSource(text))

docs <- docs %>%
  tm_map(removeNumbers) %>%
  tm_map(removePunctuation) %>%
  tm_map(stripWhitespace)
docs <- tm_map(docs, content_transformer(tolower))
docs <- tm_map(docs, removeWords, stopwords("english"))

dtm <- TermDocumentMatrix(docs) 
matrix <- as.matrix(dtm) 
words <- sort(rowSums(matrix),decreasing=TRUE) 
df_wc <- data.frame(word = names(words),freq=words)

wordcloud(words = df_wc$word, freq = df_wc$freq, min.freq = 1,max.words=200, random.order=FALSE, rot.per=0.35,colors=brewer.pal(8, "Dark2"))
```

```{r}
#Fiserv 2018
#vector containing only the text
text <- df_Fiserv_2018$text
#corpus  
docs <- Corpus(VectorSource(text))

docs <- docs %>%
  tm_map(removeNumbers) %>%
  tm_map(removePunctuation) %>%
  tm_map(stripWhitespace)
docs <- tm_map(docs, content_transformer(tolower))
docs <- tm_map(docs, removeWords, stopwords("english"))

dtm <- TermDocumentMatrix(docs) 
matrix <- as.matrix(dtm) 
words <- sort(rowSums(matrix),decreasing=TRUE) 
df_wc <- data.frame(word = names(words),freq=words)

wordcloud(words = df_wc$word, freq = df_wc$freq, min.freq = 1,max.words=200, random.order=FALSE, rot.per=0.35,colors=brewer.pal(8, "Dark2"))
```

```{r}
#Waste Management 2018
#vector containing only the text
text <- df_WM_2018$text
#corpus  
docs <- Corpus(VectorSource(text))

docs <- docs %>%
  tm_map(removeNumbers) %>%
  tm_map(removePunctuation) %>%
  tm_map(stripWhitespace)
docs <- tm_map(docs, content_transformer(tolower))
docs <- tm_map(docs, removeWords, stopwords("english"))

dtm <- TermDocumentMatrix(docs) 
matrix <- as.matrix(dtm) 
words <- sort(rowSums(matrix),decreasing=TRUE) 
df_wc <- data.frame(word = names(words),freq=words)

wordcloud(words = df_wc$word, freq = df_wc$freq, min.freq = 1,max.words=200, random.order=FALSE, rot.per=0.35,colors=brewer.pal(8, "Dark2"))
```

```{r}
#Wills Tower 2016
#vector containing only the text
text <- df_Wills_2016$text
#corpus  
docs <- Corpus(VectorSource(text))

docs <- docs %>%
  tm_map(removeNumbers) %>%
  tm_map(removePunctuation) %>%
  tm_map(stripWhitespace)
docs <- tm_map(docs, content_transformer(tolower))
docs <- tm_map(docs, removeWords, stopwords("english"))

dtm <- TermDocumentMatrix(docs) 
matrix <- as.matrix(dtm) 
words <- sort(rowSums(matrix),decreasing=TRUE) 
df_wc <- data.frame(word = names(words),freq=words)

wordcloud(words = df_wc$word, freq = df_wc$freq, min.freq = 1,max.words=200, random.order=FALSE, rot.per=0.35,colors=brewer.pal(8, "Dark2"))
```
```{r}
#Wills Tower 2018
#vector containing only the text
text <- df_Wills_2018$text
#corpus  
docs <- Corpus(VectorSource(text))

docs <- docs %>%
  tm_map(removeNumbers) %>%
  tm_map(removePunctuation) %>%
  tm_map(stripWhitespace)
docs <- tm_map(docs, content_transformer(tolower))
docs <- tm_map(docs, removeWords, stopwords("english"))

dtm <- TermDocumentMatrix(docs) 
matrix <- as.matrix(dtm) 
words <- sort(rowSums(matrix),decreasing=TRUE) 
df_wc <- data.frame(word = names(words),freq=words)

wordcloud(words = df_wc$word, freq = df_wc$freq, min.freq = 1,max.words=200, random.order=FALSE, rot.per=0.35,colors=brewer.pal(8, "Dark2"))
```
```{r}
#Tripadvisor 2016
#vector containing only the text
text <- df_tripadvisor_2016$text
#corpus  
docs <- Corpus(VectorSource(text))

docs <- docs %>%
  tm_map(removeNumbers) %>%
  tm_map(removePunctuation) %>%
  tm_map(stripWhitespace)
docs <- tm_map(docs, content_transformer(tolower))
docs <- tm_map(docs, removeWords, stopwords("english"))

dtm <- TermDocumentMatrix(docs) 
matrix <- as.matrix(dtm) 
words <- sort(rowSums(matrix),decreasing=TRUE) 
df_wc <- data.frame(word = names(words),freq=words)

wordcloud(words = df_wc$word, freq = df_wc$freq, min.freq = 1,max.words=200, random.order=FALSE, rot.per=0.35,colors=brewer.pal(8, "Dark2"))
```
```{r}
#Tripadvisor 2018

#vector containing only the text
text <- df_tripadvisor_2018$text
#corpus  
docs <- Corpus(VectorSource(text))

docs <- docs %>%
  tm_map(removeNumbers) %>%
  tm_map(removePunctuation) %>%
  tm_map(stripWhitespace)
docs <- tm_map(docs, content_transformer(tolower))
docs <- tm_map(docs, removeWords, stopwords("english"))

dtm <- TermDocumentMatrix(docs) 
matrix <- as.matrix(dtm) 
words <- sort(rowSums(matrix),decreasing=TRUE) 
df_wc <- data.frame(word = names(words),freq=words)

wordcloud(words = df_wc$word, freq = df_wc$freq, min.freq = 1,max.words=200, random.order=FALSE, rot.per=0.35,colors=brewer.pal(8, "Dark2"))
```

```{r}
#DavitaKent 

#vector containing only the text
text <- df_DavitaK_2016$text
#corpus  
docs <- Corpus(VectorSource(text))

docs <- docs %>%
  tm_map(removeNumbers) %>%
  tm_map(removePunctuation) %>%
  tm_map(stripWhitespace)
docs <- tm_map(docs, content_transformer(tolower))
docs <- tm_map(docs, removeWords, stopwords("english"))

dtm <- TermDocumentMatrix(docs) 
matrix <- as.matrix(dtm) 
words <- sort(rowSums(matrix),decreasing=TRUE) 
df_wc <- data.frame(word = names(words),freq=words)

wordcloud(words = df_wc$word, freq = df_wc$freq, min.freq = 1,max.words=200, random.order=FALSE, rot.per=0.35,colors=brewer.pal(8, "Dark2"))

```
```{r}
#Davita Kent 2018
#vector containing only the text
text <- df_DavitaK_2018$text
#corpus  
docs <- Corpus(VectorSource(text))

docs <- docs %>%
  tm_map(removeNumbers) %>%
  tm_map(removePunctuation) %>%
  tm_map(stripWhitespace)
docs <- tm_map(docs, content_transformer(tolower))
docs <- tm_map(docs, removeWords, stopwords("english"))

dtm <- TermDocumentMatrix(docs) 
matrix <- as.matrix(dtm) 
words <- sort(rowSums(matrix),decreasing=TRUE) 
df_wc <- data.frame(word = names(words),freq=words)

wordcloud(words = df_wc$word, freq = df_wc$freq, min.freq = 1,max.words=200, random.order=FALSE, rot.per=0.35,colors=brewer.pal(8, "Dark2"))

```
```{r}
#Starbucks 2018
#vector containing only the text
text <- df_Starbucks_2018$text
#corpus  
docs <- Corpus(VectorSource(text))

docs <- docs %>%
  tm_map(removeNumbers) %>%
  tm_map(removePunctuation) %>%
  tm_map(stripWhitespace)
docs <- tm_map(docs, content_transformer(tolower))
docs <- tm_map(docs, removeWords, stopwords("english"))

dtm <- TermDocumentMatrix(docs) 
matrix <- as.matrix(dtm) 
words <- sort(rowSums(matrix),decreasing=TRUE) 
df_wc <- data.frame(word = names(words),freq=words)

wordcloud(words = df_wc$word, freq = df_wc$freq, min.freq = 1,max.words=200, random.order=FALSE, rot.per=0.35,colors=brewer.pal(8, "Dark2"))
```
```{r}
#McCormick 2018
#vector containing only the text
text <- df_McCormick_2018$text
#corpus  
docs <- Corpus(VectorSource(text))

docs <- docs %>%
  tm_map(removeNumbers) %>%
  tm_map(removePunctuation) %>%
  tm_map(stripWhitespace)
docs <- tm_map(docs, content_transformer(tolower))
docs <- tm_map(docs, removeWords, stopwords("english"))

dtm <- TermDocumentMatrix(docs) 
matrix <- as.matrix(dtm) 
words <- sort(rowSums(matrix),decreasing=TRUE) 
df_wc <- data.frame(word = names(words),freq=words)

wordcloud(words = df_wc$word, freq = df_wc$freq, min.freq = 1,max.words=200, random.order=FALSE, rot.per=0.35,colors=brewer.pal(8, "Dark2"))

```

```{r}
#IHS Markit 2018
#vector containing only the text
text <- df_IHS_2018$text
#corpus  
docs <- Corpus(VectorSource(text))

docs <- docs %>%
  tm_map(removeNumbers) %>%
  tm_map(removePunctuation) %>%
  tm_map(stripWhitespace)
docs <- tm_map(docs, content_transformer(tolower))
docs <- tm_map(docs, removeWords, stopwords("english"))

dtm <- TermDocumentMatrix(docs) 
matrix <- as.matrix(dtm) 
words <- sort(rowSums(matrix),decreasing=TRUE) 
df_wc <- data.frame(word = names(words),freq=words)

wordcloud(words = df_wc$word, freq = df_wc$freq, min.freq = 1,max.words=200, random.order=FALSE, rot.per=0.35,colors=brewer.pal(8, "Dark2"))

```
```{r}
#AMD 2018
#vector containing only the text
text <- df_AMD_2018$text
#corpus  
docs <- Corpus(VectorSource(text))

docs <- docs %>%
  tm_map(removeNumbers) %>%
  tm_map(removePunctuation) %>%
  tm_map(stripWhitespace)
docs <- tm_map(docs, content_transformer(tolower))
docs <- tm_map(docs, removeWords, stopwords("english"))

dtm <- TermDocumentMatrix(docs) 
matrix <- as.matrix(dtm) 
words <- sort(rowSums(matrix),decreasing=TRUE) 
df_wc <- data.frame(word = names(words),freq=words)

wordcloud(words = df_wc$word, freq = df_wc$freq, min.freq = 1,max.words=200, random.order=FALSE, rot.per=0.35,colors=brewer.pal(8, "Dark2"))
```
```{r}
#ResMed 2016
#vector containing only the text
text <- df_ResMed_2016$text
#corpus  
docs <- Corpus(VectorSource(text))

docs <- docs %>%
  tm_map(removeNumbers) %>%
  tm_map(removePunctuation) %>%
  tm_map(stripWhitespace)
docs <- tm_map(docs, content_transformer(tolower))
docs <- tm_map(docs, removeWords, stopwords("english"))

dtm <- TermDocumentMatrix(docs) 
matrix <- as.matrix(dtm) 
words <- sort(rowSums(matrix),decreasing=TRUE) 
df_wc <- data.frame(word = names(words),freq=words)

wordcloud(words = df_wc$word, freq = df_wc$freq, min.freq = 1,max.words=200, random.order=FALSE, rot.per=0.35,colors=brewer.pal(8, "Dark2"))

```
```{r}
#ResMed 2018
#vector containing only the text
text <- df_ResMed_2018$text
#corpus  
docs <- Corpus(VectorSource(text))

docs <- docs %>%
  tm_map(removeNumbers) %>%
  tm_map(removePunctuation) %>%
  tm_map(stripWhitespace)
docs <- tm_map(docs, content_transformer(tolower))
docs <- tm_map(docs, removeWords, stopwords("english"))

dtm <- TermDocumentMatrix(docs) 
matrix <- as.matrix(dtm) 
words <- sort(rowSums(matrix),decreasing=TRUE) 
df_wc <- data.frame(word = names(words),freq=words)

wordcloud(words = df_wc$word, freq = df_wc$freq, min.freq = 1,max.words=200, random.order=FALSE, rot.per=0.35,colors=brewer.pal(8, "Dark2"))
```
```{r}
#CA 2016
#vector containing only the text
text <- df_CA_2016$text
#corpus  
docs <- Corpus(VectorSource(text))

docs <- docs %>%
  tm_map(removeNumbers) %>%
  tm_map(removePunctuation) %>%
  tm_map(stripWhitespace)
docs <- tm_map(docs, content_transformer(tolower))
docs <- tm_map(docs, removeWords, stopwords("english"))

dtm <- TermDocumentMatrix(docs) 
matrix <- as.matrix(dtm) 
words <- sort(rowSums(matrix),decreasing=TRUE) 
df_wc <- data.frame(word = names(words),freq=words)

wordcloud(words = df_wc$word, freq = df_wc$freq, min.freq = 1,max.words=200, random.order=FALSE, rot.per=0.35,colors=brewer.pal(8, "Dark2"))

```
```{r}
#CA 2018
#vector containing only the text
text <- df_CA_2018$text
#corpus  
docs <- Corpus(VectorSource(text))

docs <- docs %>%
  tm_map(removeNumbers) %>%
  tm_map(removePunctuation) %>%
  tm_map(stripWhitespace)
docs <- tm_map(docs, content_transformer(tolower))
docs <- tm_map(docs, removeWords, stopwords("english"))

dtm <- TermDocumentMatrix(docs) 
matrix <- as.matrix(dtm) 
words <- sort(rowSums(matrix),decreasing=TRUE) 
df_wc <- data.frame(word = names(words),freq=words)

wordcloud(words = df_wc$word, freq = df_wc$freq, min.freq = 1,max.words=200, random.order=FALSE, rot.per=0.35,colors=brewer.pal(8, "Dark2"))
```

```{r}
#GM 2016
#vector containing only the text
text <- df_GM_2016$text
#corpus  
docs <- Corpus(VectorSource(text))

docs <- docs %>%
  tm_map(removeNumbers) %>%
  tm_map(removePunctuation) %>%
  tm_map(stripWhitespace)
docs <- tm_map(docs, content_transformer(tolower))
docs <- tm_map(docs, removeWords, stopwords("english"))

dtm <- TermDocumentMatrix(docs) 
matrix <- as.matrix(dtm) 
words <- sort(rowSums(matrix),decreasing=TRUE) 
df_wc <- data.frame(word = names(words),freq=words)

wordcloud(words = df_wc$word, freq = df_wc$freq, min.freq = 1,max.words=200, random.order=FALSE, rot.per=0.35,colors=brewer.pal(8, "Dark2"))

```
```{r}
#GM 2018
#vector containing only the text
text <- df_GM_2018$text
#corpus  
docs <- Corpus(VectorSource(text))

docs <- docs %>%
  tm_map(removeNumbers) %>%
  tm_map(removePunctuation) %>%
  tm_map(stripWhitespace)
docs <- tm_map(docs, content_transformer(tolower))
docs <- tm_map(docs, removeWords, stopwords("english"))

dtm <- TermDocumentMatrix(docs) 
matrix <- as.matrix(dtm) 
words <- sort(rowSums(matrix),decreasing=TRUE) 
df_wc <- data.frame(word = names(words),freq=words)

wordcloud(words = df_wc$word, freq = df_wc$freq, min.freq = 1,max.words=200, random.order=FALSE, rot.per=0.35,colors=brewer.pal(8, "Dark2"))

```

```{r}
#Aetna 2016
#vector containing only the text
text <- df_Aetna_2016$text
#corpus  
docs <- Corpus(VectorSource(text))

docs <- docs %>%
  tm_map(removeNumbers) %>%
  tm_map(removePunctuation) %>%
  tm_map(stripWhitespace)
docs <- tm_map(docs, content_transformer(tolower))
docs <- tm_map(docs, removeWords, stopwords("english"))

dtm <- TermDocumentMatrix(docs) 
matrix <- as.matrix(dtm) 
words <- sort(rowSums(matrix),decreasing=TRUE) 
df_wc <- data.frame(word = names(words),freq=words)

wordcloud(words = df_wc$word, freq = df_wc$freq, min.freq = 1,max.words=200, random.order=FALSE, rot.per=0.35,colors=brewer.pal(8, "Dark2"))
```

```{r}
#Aetna 2018
#vector containing only the text
text <- df_Aetna_2018$text
#corpus  
docs <- Corpus(VectorSource(text))

docs <- docs %>%
  tm_map(removeNumbers) %>%
  tm_map(removePunctuation) %>%
  tm_map(stripWhitespace)
docs <- tm_map(docs, content_transformer(tolower))
docs <- tm_map(docs, removeWords, stopwords("english"))

dtm <- TermDocumentMatrix(docs) 
matrix <- as.matrix(dtm) 
words <- sort(rowSums(matrix),decreasing=TRUE) 
df_wc <- data.frame(word = names(words),freq=words)

wordcloud(words = df_wc$word, freq = df_wc$freq, min.freq = 1,max.words=200, random.order=FALSE, rot.per=0.35,colors=brewer.pal(8, "Dark2"))
```

```{r}
#NRG 2016
#vector containing only the text
text <- df_NRG_2016$text
#corpus  
docs <- Corpus(VectorSource(text))

docs <- docs %>%
  tm_map(removeNumbers) %>%
  tm_map(removePunctuation) %>%
  tm_map(stripWhitespace)
docs <- tm_map(docs, content_transformer(tolower))
docs <- tm_map(docs, removeWords, stopwords("english"))

dtm <- TermDocumentMatrix(docs) 
matrix <- as.matrix(dtm) 
words <- sort(rowSums(matrix),decreasing=TRUE) 
df_wc <- data.frame(word = names(words),freq=words)

wordcloud(words = df_wc$word, freq = df_wc$freq, min.freq = 1,max.words=200, random.order=FALSE, rot.per=0.35,colors=brewer.pal(8, "Dark2"))
```
```{r}
#NRG 2018
#vector containing only the text
text <- df_NRG_2018$text
#corpus  
docs <- Corpus(VectorSource(text))

docs <- docs %>%
  tm_map(removeNumbers) %>%
  tm_map(removePunctuation) %>%
  tm_map(stripWhitespace)
docs <- tm_map(docs, content_transformer(tolower))
docs <- tm_map(docs, removeWords, stopwords("english"))

dtm <- TermDocumentMatrix(docs) 
matrix <- as.matrix(dtm) 
words <- sort(rowSums(matrix),decreasing=TRUE) 
df_wc <- data.frame(word = names(words),freq=words)

wordcloud(words = df_wc$word, freq = df_wc$freq, min.freq = 1,max.words=200, random.order=FALSE, rot.per=0.35,colors=brewer.pal(8, "Dark2"))
```
```{r}
#Medtronic 2016
#vector containing only the text
text <- df_Medtronic_2016$text
#corpus  
docs <- Corpus(VectorSource(text))

docs <- docs %>%
  tm_map(removeNumbers) %>%
  tm_map(removePunctuation) %>%
  tm_map(stripWhitespace)
docs <- tm_map(docs, content_transformer(tolower))
docs <- tm_map(docs, removeWords, stopwords("english"))

dtm <- TermDocumentMatrix(docs) 
matrix <- as.matrix(dtm) 
words <- sort(rowSums(matrix),decreasing=TRUE) 
df_wc <- data.frame(word = names(words),freq=words)

wordcloud(words = df_wc$word, freq = df_wc$freq, min.freq = 1,max.words=200, random.order=FALSE, rot.per=0.35,colors=brewer.pal(8, "Dark2"))

```

```{r}
#Medtronic 2018
#vector containing only the text
text <- df_Medtronic_2018$text
#corpus  
docs <- Corpus(VectorSource(text))

docs <- docs %>%
  tm_map(removeNumbers) %>%
  tm_map(removePunctuation) %>%
  tm_map(stripWhitespace)
docs <- tm_map(docs, content_transformer(tolower))
docs <- tm_map(docs, removeWords, stopwords("english"))

dtm <- TermDocumentMatrix(docs) 
matrix <- as.matrix(dtm) 
words <- sort(rowSums(matrix),decreasing=TRUE) 
df_wc <- data.frame(word = names(words),freq=words)

wordcloud(words = df_wc$word, freq = df_wc$freq, min.freq = 1,max.words=200, random.order=FALSE, rot.per=0.35,colors=brewer.pal(8, "Dark2"))

```
```{r}
#Juniper 2016
#vector containing only the text
text <- df_Juniper_2016$text
#corpus  
docs <- Corpus(VectorSource(text))

docs <- docs %>%
  tm_map(removeNumbers) %>%
  tm_map(removePunctuation) %>%
  tm_map(stripWhitespace)
docs <- tm_map(docs, content_transformer(tolower))
docs <- tm_map(docs, removeWords, stopwords("english"))

dtm <- TermDocumentMatrix(docs) 
matrix <- as.matrix(dtm) 
words <- sort(rowSums(matrix),decreasing=TRUE) 
df_wc <- data.frame(word = names(words),freq=words)

wordcloud(words = df_wc$word, freq = df_wc$freq, min.freq = 1,max.words=200, random.order=FALSE, rot.per=0.35,colors=brewer.pal(8, "Dark2"))

```

```{r}
#Juniper 2018
#vector containing only the text
text <- df_Juniper_2018$text
#corpus  
docs <- Corpus(VectorSource(text))

docs <- docs %>%
  tm_map(removeNumbers) %>%
  tm_map(removePunctuation) %>%
  tm_map(stripWhitespace)
docs <- tm_map(docs, content_transformer(tolower))
docs <- tm_map(docs, removeWords, stopwords("english"))

dtm <- TermDocumentMatrix(docs) 
matrix <- as.matrix(dtm) 
words <- sort(rowSums(matrix),decreasing=TRUE) 
df_wc <- data.frame(word = names(words),freq=words)

wordcloud(words = df_wc$word, freq = df_wc$freq, min.freq = 1,max.words=200, random.order=FALSE, rot.per=0.35,colors=brewer.pal(8, "Dark2"))
```
```{r}

#Aetna Ramon
#vector containing only the text
text <- df_AetnaR_2018$text
#corpus  
docs <- Corpus(VectorSource(text))

docs <- docs %>%
  tm_map(removeNumbers) %>%
  tm_map(removePunctuation) %>%
  tm_map(stripWhitespace)
docs <- tm_map(docs, content_transformer(tolower))
docs <- tm_map(docs, removeWords, stopwords("english"))

dtm <- TermDocumentMatrix(docs) 
matrix <- as.matrix(dtm) 
words <- sort(rowSums(matrix),decreasing=TRUE) 
df_wc <- data.frame(word = names(words),freq=words)

wordcloud(words = df_wc$word, freq = df_wc$freq, min.freq = 1,max.words=200, random.order=FALSE, rot.per=0.35,colors=brewer.pal(8, "Dark2"))

```
```{r}
#Netflix
#vector containing only the text
text <- df_Netflix_2016$text
#corpus  
docs <- Corpus(VectorSource(text))

docs <- docs %>%
  tm_map(removeNumbers) %>%
  tm_map(removePunctuation) %>%
  tm_map(stripWhitespace)
docs <- tm_map(docs, content_transformer(tolower))
docs <- tm_map(docs, removeWords, stopwords("english"))

dtm <- TermDocumentMatrix(docs) 
matrix <- as.matrix(dtm) 
words <- sort(rowSums(matrix),decreasing=TRUE) 
df_wc <- data.frame(word = names(words),freq=words)

wordcloud(words = df_wc$word, freq = df_wc$freq, min.freq = 1,max.words=200, random.order=FALSE, rot.per=0.35,colors=brewer.pal(8, "Dark2"))
```

```{r}
#Disney
#vector containing only the text
text <- df_Disney_2018$text
#corpus  
docs <- Corpus(VectorSource(text))

docs <- docs %>%
  tm_map(removeNumbers) %>%
  tm_map(removePunctuation) %>%
  tm_map(stripWhitespace)
docs <- tm_map(docs, content_transformer(tolower))
docs <- tm_map(docs, removeWords, stopwords("english"))

dtm <- TermDocumentMatrix(docs) 
matrix <- as.matrix(dtm) 
words <- sort(rowSums(matrix),decreasing=TRUE) 
df_wc <- data.frame(word = names(words),freq=words)

wordcloud(words = df_wc$word, freq = df_wc$freq, min.freq = 1,max.words=200, random.order=FALSE, rot.per=0.35,colors=brewer.pal(8, "Dark2"))

```
```{r}
#Fox 2015
#vector containing only the text
text <- df_Fox_2015$text
#corpus  
docs <- Corpus(VectorSource(text))

docs <- docs %>%
  tm_map(removeNumbers) %>%
  tm_map(removePunctuation) %>%
  tm_map(stripWhitespace)
docs <- tm_map(docs, content_transformer(tolower))
docs <- tm_map(docs, removeWords, stopwords("english"))

dtm <- TermDocumentMatrix(docs) 
matrix <- as.matrix(dtm) 
words <- sort(rowSums(matrix),decreasing=TRUE) 
df_wc <- data.frame(word = names(words),freq=words)

wordcloud(words = df_wc$word, freq = df_wc$freq, min.freq = 1,max.words=200, random.order=FALSE, rot.per=0.35,colors=brewer.pal(8, "Dark2"))
```

```{r}
#Microsoft 2016
#vector containing only the text
text <- df_Microsoft_2016$text
#corpus  
docs <- Corpus(VectorSource(text))

docs <- docs %>%
  tm_map(removeNumbers) %>%
  tm_map(removePunctuation) %>%
  tm_map(stripWhitespace)
docs <- tm_map(docs, content_transformer(tolower))
docs <- tm_map(docs, removeWords, stopwords("english"))

dtm <- TermDocumentMatrix(docs) 
matrix <- as.matrix(dtm) 
words <- sort(rowSums(matrix),decreasing=TRUE) 
df_wc <- data.frame(word = names(words),freq=words)

wordcloud(words = df_wc$word, freq = df_wc$freq, min.freq = 1,max.words=200, random.order=FALSE, rot.per=0.35,colors=brewer.pal(8, "Dark2"))
```

```{r}

```

